Coverage for tests/test_query_relations.py: 26%

52 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-10 10:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import difflib 

29import os 

30import os.path 

31import re 

32import unittest 

33 

34from lsst.daf.butler.registry import MissingSpatialOverlapError, RegistryConfig, _RegistryFactory 

35from lsst.daf.butler.registry.queries import ( 

36 DataCoordinateQueryResults, 

37 DatasetQueryResults, 

38 DimensionRecordQueryResults, 

39) 

40from lsst.daf.butler.transfers import YamlRepoImportBackend 

41 

42TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

43 

44 

45class TestQueryRelationsTests(unittest.TestCase): 

46 """Tests for registry queries that check that the generated relation tree 

47 matches expectations. 

48 

49 These tests are somewhat fragile - there are multiple valid relation trees 

50 for most registry queries, just as there are multiple valid SQL queries, 

51 and since we check the relation tree via string comparisons we are 

52 also sensitive to irrelevant things like column ordering. But these 

53 differences are deterministic, and checking the relation trees instead of 

54 the query results puts a much smaller burden on test-data creation and 

55 inspection (as well as making tests go faster), making it much easier to 

56 test many combinations of arguments. 

57 

58 Note that daf_relation provides good test coverage of the process of going 

59 from relation trees to SQL. 

60 """ 

61 

62 @classmethod 

63 def setUpClass(cls) -> None: 

64 config = RegistryConfig() 

65 config["db"] = "sqlite://" 

66 cls.registry = _RegistryFactory(config).create_from_config() 

67 # We need just enough test data to have valid dimension records for 

68 # all of the dimensions we're concerned with, and we want to pick 

69 # values for each dimension that correspond to a spatiotemporal 

70 # overlap. Without that, we'd be fighting optimizations built into the 

71 # query system that simplify things as soon as it can spot that there 

72 # will be no overall results. 

73 data_file = os.path.normpath(os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

74 with open(data_file) as stream: 

75 backend = YamlRepoImportBackend(stream, cls.registry) 

76 backend.register() 

77 backend.load(datastore=None) 

78 assert ( 

79 cls.registry.dimensions.commonSkyPix.name == "htm7" 

80 ), "If this changes, update the skypix levels below to have one below and one above." 

81 cls.htm7 = 222340 

82 cls.htm11 = 56919188 

83 cls.instrument = "HSC" 

84 cls.skymap = "hsc_rings_v1" 

85 cls.visit = 404 

86 cls.tract = 9615 

87 cls.detector = 0 

88 cls.patch = 14 

89 cls.data_id = cls.registry.expandDataId( 

90 htm7=cls.htm7, 

91 htm11=cls.htm11, 

92 instrument=cls.instrument, 

93 skymap=cls.skymap, 

94 visit=cls.visit, 

95 tract=cls.tract, 

96 detector=cls.detector, 

97 patch=cls.patch, 

98 ) 

99 cls.day_obs = cls.data_id["day_obs"] 

100 cls.band = cls.data_id["band"] 

101 cls.physical_filter = cls.data_id["physical_filter"] 

102 

103 def assert_relation_str( 

104 self, 

105 expected: str, 

106 *results: DataCoordinateQueryResults | DimensionRecordQueryResults | DatasetQueryResults, 

107 ) -> None: 

108 """Assert that checks that one or more registry 

109 queries have relation trees that match the given string. 

110 

111 Parameters 

112 ---------- 

113 expected : `str` 

114 Expected relation tree, corresponding to 

115 ``lsst.daf.relation.Relation.__str__`` (which is much more concise 

116 and readable than the `repr` version, once you get used to it). 

117 Any newlines and indentation will be stripped. 

118 *results 

119 Query result objects returned by queryDataIds, 

120 queryDimensionRecords, or queryDatasets. 

121 """ 

122 # Drop newlines and leading/trailing space. 

123 expected = expected.replace("\n", " ").strip() 

124 # Drop duplicate spaces (i.e. indentation). 

125 expected = re.sub(r" \s+", " ", expected) 

126 # Drop spaces next to parentheses and square brackets. 

127 expected = re.sub(r"\s*(\[|\(|\)|\])\s*", r"\1", expected) 

128 differ = difflib.Differ() 

129 for n, result in enumerate(results): 

130 result_str = str(result._query.relation) 

131 if expected != result_str: 

132 message_lines = [f"Unexpected relation string for query {n}:"] 

133 message_lines.extend( 

134 differ.compare( 

135 [expected], 

136 [result_str], 

137 ) 

138 ) 

139 raise AssertionError("\n".join(message_lines)) 

140 

141 def test_spatial_constraints(self) -> None: 

142 """Test query constraints from data IDs and WHERE clauses that imply a 

143 spatial region. 

144 """ 

145 # Constrain one set of regular spatial dimensions from another. 

146 # This needs post-query filtering in the iteration engine. 

147 self.assert_relation_str( 

148 f""" 

149 Π[band, patch, skymap, tract]( 

150 σ[regions_overlap(patch.region, visit_detector_region.region)]( 

151 →[iteration]( 

152 select( 

153 Π[band, patch, patch.region, skymap, tract, visit_detector_region.region]( 

154 σ[ 

155 instrument={self.instrument!r} 

156 and detector={self.detector!r} 

157 and visit={self.visit!r} 

158 and band={self.band!r} 

159 and day_obs={self.day_obs!r} 

160 and physical_filter={self.physical_filter!r} 

161 ]( 

162 patch_htm7_overlap 

163 ⋈ visit_detector_region_htm7_overlap 

164 ⋈ physical_filter 

165 ⋈ patch 

166 ⋈ visit 

167 ⋈ visit_detector_region 

168 ) 

169 ) 

170 ) 

171 ) 

172 ) 

173 ) 

174 """, 

175 self.registry.queryDataIds( 

176 ["patch", "band"], instrument=self.instrument, visit=self.visit, detector=self.detector 

177 ), 

178 self.registry.queryDataIds( 

179 ["patch", "band"], 

180 where=( 

181 f"instrument={self.instrument!r} " 

182 f"and detector={self.detector!r} " 

183 f"and visit={self.visit!r}" 

184 f"and band={self.band!r} " 

185 f"and day_obs={self.day_obs!r}" 

186 f"and physical_filter={self.physical_filter!r} " 

187 ), 

188 ), 

189 ) 

190 # Constrain the special common skypix dimension from a regular 

191 # dimension. This does not need any post-query filtering. 

192 self.assert_relation_str( 

193 # It would be better if this query didn't join in visit and 

194 # physical_filter - it does that to ensure all implied dimension 

195 # relationships are satisfied in the results, but the dimensions 

196 # implied by visit are not present in the results and play no role 

197 # in the constraints. But it'd be hard to fix that and any fix 

198 # would be very rarely exercised. 

199 f""" 

200 select( 

201 Π[htm7]( 

202 σ[ 

203 instrument={self.instrument!r} 

204 and detector={self.detector!r} 

205 and visit={self.visit!r} 

206 and band={self.band!r} 

207 and day_obs={self.day_obs!r} 

208 and physical_filter={self.physical_filter!r} 

209 ]( 

210 visit_detector_region_htm7_overlap 

211 ⋈ physical_filter 

212 ⋈ visit 

213 ) 

214 ) 

215 ) 

216 """, 

217 self.registry.queryDataIds( 

218 ["htm7"], instrument=self.instrument, visit=self.visit, detector=self.detector 

219 ), 

220 # For regular dimension constraints we can also support having the 

221 # data ID expressed as a 'where' expression. The query would also 

222 # have the same behavior with only visit and detector specified 

223 # in the 'where' string, but it'd change the expected string. 

224 self.registry.queryDataIds( 

225 ["htm7"], 

226 where=( 

227 f"instrument={self.instrument!r} " 

228 f"and detector={self.detector!r} " 

229 f"and visit={self.visit!r}" 

230 f"and band={self.band!r} " 

231 f"and day_obs={self.day_obs!r}" 

232 f"and physical_filter={self.physical_filter!r} " 

233 ), 

234 ), 

235 ) 

236 # We can't constrain any other skypix system spatially, because we 

237 # don't have overlap rows for those in the database. But in the future 

238 # we might be able to fake it with an iteration-engine spatial join, or 

239 # utilize explicitly-materialized overlaps. 

240 with self.assertRaises(MissingSpatialOverlapError): 

241 self.registry.queryDataIds( 

242 ["htm11"], 

243 instrument=self.instrument, 

244 visit=self.visit, 

245 detector=self.detector, 

246 ) 

247 # Constrain a regular spatial dimension (patch) from a non-common 

248 # skypix dimension common. In general this requires post-query 

249 # filtering to get only the patches that overlap the skypix pixel. We 

250 # could special-case skypix dimensions that are coarser than the common 

251 # dimension and part of the same system to simplify both the SQL query 

252 # and avoid post-query filtering, but we don't at present. 

253 self.assert_relation_str( 

254 f""" 

255 Π[patch, skymap, tract]( 

256 σ[ 

257 regions_overlap( 

258 patch.region, 

259 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)} 

260 ) 

261 ]( 

262 →[iteration]( 

263 select( 

264 Π[patch, patch.region, skymap, tract]( 

265 σ[htm7={self.htm7!r}]( 

266 patch_htm7_overlap ⋈ patch 

267 ) 

268 ) 

269 ) 

270 ) 

271 ) 

272 ) 

273 """, 

274 self.registry.queryDataIds(["patch"], htm11=self.htm11), 

275 ) 

276 # Constrain a regular spatial dimension (patch) from the common 

277 # skypix dimension. This does not require post-query filtering. 

278 self.assert_relation_str( 

279 f""" 

280 select( 

281 Π[patch, skymap, tract]( 

282 σ[htm7={self.htm7!r}]( 

283 patch_htm7_overlap 

284 ) 

285 ) 

286 ) 

287 """, 

288 self.registry.queryDataIds(["patch"], htm7=self.htm7), 

289 ) 

290 # Constrain a regular dimension (detector) via a different dimension 

291 # (visit) that combine together to define a more fine-grained region, 

292 # and also constrain via a skypix dimension other than the common one. 

293 # Once again we could special-case this for skypix dimensions that are 

294 # coarser than the common dimension in the same syste, but we don't. 

295 self.assert_relation_str( 

296 # This query also doesn't need visit or physical_filter joined in, 

297 # but we can live with that. 

298 f""" 

299 Π[detector, instrument]( 

300 σ[ 

301 regions_overlap( 

302 visit_detector_region.region, 

303 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)} 

304 ) 

305 ]( 

306 →[iteration]( 

307 select( 

308 Π[detector, instrument, visit_detector_region.region]( 

309 σ[ 

310 instrument={self.instrument!r} 

311 and visit={self.visit!r} 

312 and band={self.band!r} 

313 and day_obs={self.day_obs!r} 

314 and physical_filter={self.physical_filter!r} 

315 and htm7={self.htm7!r} 

316 ]( 

317 visit_detector_region_htm7_overlap 

318 ⋈ physical_filter 

319 ⋈ visit 

320 ⋈ visit_detector_region 

321 ) 

322 ) 

323 ) 

324 ) 

325 ) 

326 ) 

327 """, 

328 self.registry.queryDataIds( 

329 ["detector"], visit=self.visit, instrument=self.instrument, htm11=self.htm11 

330 ), 

331 ) 

332 # Constrain a regular dimension (detector) via a different dimension 

333 # (visit) that combine together to define a more fine-grained region, 

334 # and also constrain via the common-skypix system. 

335 self.assert_relation_str( 

336 # This query also doesn't need visit or physical_filter joined in, 

337 # but we can live with that. 

338 f""" 

339 select( 

340 Π[detector, instrument]( 

341 σ[ 

342 htm7={self.htm7!r} 

343 and instrument={self.instrument!r} 

344 and visit={self.visit!r} 

345 and band={self.band!r} 

346 and day_obs={self.day_obs!r} 

347 and physical_filter={self.physical_filter!r} 

348 ]( 

349 visit_detector_region_htm7_overlap 

350 ⋈ physical_filter 

351 ⋈ visit 

352 ) 

353 ) 

354 ) 

355 """, 

356 self.registry.queryDataIds( 

357 ["detector"], visit=self.visit, instrument=self.instrument, htm7=self.htm7 

358 ), 

359 ) 

360 

361 

362if __name__ == "__main__": 

363 unittest.main()