Coverage for tests/test_query_relations.py: 22%
50 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-06 09:33 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-06 09:33 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import difflib
23import os
24import os.path
25import re
26import unittest
28from lsst.daf.butler.registry import MissingSpatialOverlapError, Registry, RegistryConfig, queries
29from lsst.daf.butler.transfers import YamlRepoImportBackend
31TESTDIR = os.path.abspath(os.path.dirname(__file__))
34class TestQueryRelationsTests(unittest.TestCase):
35 """Tests for registry queries that check that the generated relation tree
36 matches expectations.
38 These tests are somewhat fragile - there are multiple valid relation trees
39 for most registry queries, just as there are multiple valid SQL queries,
40 and since we check the relation tree via string comparisons we are
41 also sensitive to irrelevant things like column ordering. But these
42 differences are deterministic, and checking the relation trees instead of
43 the query results puts a much smaller burden on test-data creation and
44 inspection (as well as making tests go faster), making it much easier to
45 test many combinations of arguments.
47 Note that daf_relation provides good test coverage of the process of going
48 from relation trees to SQL.
49 """
51 @classmethod
52 def setUpClass(cls) -> None:
53 config = RegistryConfig()
54 config["db"] = "sqlite://"
55 cls.registry = Registry.createFromConfig(config)
56 # We need just enough test data to have valid dimension records for
57 # all of the dimensions we're concerned with, and we want to pick
58 # values for each dimension that correspond to a spatiotemporal
59 # overlap. Without that, we'd be fighting optimizations built into the
60 # query system that simplify things as soon as it can spot that there
61 # will be no overall results.
62 data_file = os.path.normpath(os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
63 with open(data_file, "r") as stream:
64 backend = YamlRepoImportBackend(stream, cls.registry)
65 backend.register()
66 backend.load(datastore=None)
67 assert (
68 cls.registry.dimensions.commonSkyPix.name == "htm7"
69 ), "If this changes, update the skypix levels below to have one below and one above."
70 cls.htm7 = 222340
71 cls.htm11 = 56919188
72 cls.instrument = "HSC"
73 cls.skymap = "hsc_rings_v1"
74 cls.visit = 404
75 cls.tract = 9615
76 cls.detector = 0
77 cls.patch = 14
78 cls.data_id = cls.registry.expandDataId(
79 htm7=cls.htm7,
80 htm11=cls.htm11,
81 instrument=cls.instrument,
82 skymap=cls.skymap,
83 visit=cls.visit,
84 tract=cls.tract,
85 detector=cls.detector,
86 patch=cls.patch,
87 )
88 cls.band = cls.data_id["band"]
89 cls.physical_filter = cls.data_id["physical_filter"]
91 def assert_relation_str(
92 self,
93 expected: str,
94 *results: queries.DataCoordinateQueryResults
95 | queries.DimensionRecordQueryResults
96 | queries.ParentDatasetQueryResults,
97 ) -> None:
98 """A specialized test assert that checks that one or more registry
99 queries have relation trees that match the given string.
101 Parameters
102 ----------
103 expected : `str`
104 Expected relation tree, corresponding to
105 ``lsst.daf.relation.Relation.__str__`` (which is much more concise
106 and readable than the `repr` version, once you get used to it).
107 Any newlines and indentation will be stripped.
108 *results
109 Query result objects returned by queryDataIds,
110 queryDimensionRecords, or queryDatasets.
111 """
112 # Drop newlines and leading/trailing space.
113 expected = expected.replace("\n", " ").strip()
114 # Drop duplicate spaces (i.e. indentation).
115 expected = re.sub(r" \s+", " ", expected)
116 # Drop spaces next to parentheses and square brackets.
117 expected = re.sub(r"\s*(\[|\(|\)|\])\s*", r"\1", expected)
118 differ = difflib.Differ()
119 for n, result in enumerate(results):
120 result_str = str(result._query.relation)
121 if expected != result_str:
122 message_lines = [f"Unexpected relation string for query {n}:"]
123 message_lines.extend(
124 differ.compare(
125 [expected],
126 [result_str],
127 )
128 )
129 raise AssertionError("\n".join(message_lines))
131 def test_spatial_constraints(self) -> None:
132 """Test query constraints from data IDs and WHERE clauses that imply a
133 spatial region.
134 """
135 # Constrain one set of regular spatial dimensions from another.
136 # This needs post-query filtering in the iteration engine.
137 self.assert_relation_str(
138 f"""
139 Π[band, patch, skymap, tract](
140 σ[regions_overlap(patch.region, visit_detector_region.region)](
141 →[iteration](
142 select(
143 Π[band, patch, patch.region, skymap, tract, visit_detector_region.region](
144 σ[
145 band={self.band!r}
146 and instrument={self.instrument!r}
147 and detector={self.detector!r}
148 and physical_filter={self.physical_filter!r}
149 and visit={self.visit!r}
150 ](
151 patch_htm7_overlap
152 ⋈ visit_detector_region_htm7_overlap
153 ⋈ physical_filter
154 ⋈ patch
155 ⋈ visit
156 ⋈ visit_detector_region
157 )
158 )
159 )
160 )
161 )
162 )
163 """,
164 self.registry.queryDataIds(
165 ["patch", "band"], instrument=self.instrument, visit=self.visit, detector=self.detector
166 ),
167 self.registry.queryDataIds(
168 ["patch", "band"],
169 where=(
170 f"band={self.band!r} "
171 f"and instrument={self.instrument!r} "
172 f"and detector={self.detector!r} "
173 f"and physical_filter={self.physical_filter!r} "
174 f"and visit={self.visit!r}"
175 ),
176 ),
177 )
178 # Constrain the special common skypix dimension from a regular
179 # dimension. This does not need any post-query filtering.
180 self.assert_relation_str(
181 # It would be better if this query didn't join in visit and
182 # physical_filter - it does that to ensure all implied dimension
183 # relationships are satisfied in the results, but the dimensions
184 # implied by visit are not present in the results and play no role
185 # in the constraints. But it'd be hard to fix that and any fix
186 # would be very rarely exercised.
187 f"""
188 select(
189 Π[htm7](
190 σ[
191 band={self.band!r}
192 and instrument={self.instrument!r}
193 and detector={self.detector!r}
194 and physical_filter={self.physical_filter!r}
195 and visit={self.visit!r}
196 ](
197 visit_detector_region_htm7_overlap
198 ⋈ physical_filter
199 ⋈ visit
200 )
201 )
202 )
203 """,
204 self.registry.queryDataIds(
205 ["htm7"], instrument=self.instrument, visit=self.visit, detector=self.detector
206 ),
207 # For regular dimension constraints we can also support having the
208 # data ID expressed as a 'where' expression. The query would also
209 # have the same behavior with only visit and detector specified
210 # in the 'where' string, but it'd change the expected string.
211 self.registry.queryDataIds(
212 ["htm7"],
213 where=(
214 f"band={self.band!r} "
215 f"and instrument={self.instrument!r} "
216 f"and detector={self.detector!r} "
217 f"and physical_filter={self.physical_filter!r} "
218 f"and visit={self.visit!r}"
219 ),
220 ),
221 )
222 # We can't constrain any other skypix system spatially, because we
223 # don't have overlap rows for those in the database. But in the future
224 # we might be able to fake it with an iteration-engine spatial join, or
225 # utilize explicitly-materialized overlaps.
226 with self.assertRaises(MissingSpatialOverlapError):
227 self.registry.queryDataIds(
228 ["htm11"],
229 instrument=self.instrument,
230 visit=self.visit,
231 detector=self.detector,
232 )
233 # Constrain a regular spatial dimension (patch) from a non-common
234 # skypix dimension common. In general this requires post-query
235 # filtering to get only the patches that overlap the skypix pixel. We
236 # could special-case skypix dimensions that are coarser than the common
237 # dimension and part of the same system to simplify both the SQL query
238 # and avoid post-query filtering, but we don't at present.
239 self.assert_relation_str(
240 f"""
241 Π[patch, skymap, tract](
242 σ[
243 regions_overlap(
244 patch.region,
245 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)}
246 )
247 ](
248 →[iteration](
249 select(
250 Π[patch, patch.region, skymap, tract](
251 σ[htm7={self.htm7!r}](
252 patch_htm7_overlap ⋈ patch
253 )
254 )
255 )
256 )
257 )
258 )
259 """,
260 self.registry.queryDataIds(["patch"], htm11=self.htm11),
261 )
262 # Constrain a regular spatial dimension (patch) from the common
263 # skypix dimension. This does not require post-query filtering.
264 self.assert_relation_str(
265 f"""
266 select(
267 Π[patch, skymap, tract](
268 σ[htm7={self.htm7!r}](
269 patch_htm7_overlap
270 )
271 )
272 )
273 """,
274 self.registry.queryDataIds(["patch"], htm7=self.htm7),
275 )
276 # Constrain a regular dimension (detector) via a different dimension
277 # (visit) that combine together to define a more fine-grained region,
278 # and also constrain via a skypix dimension other than the common one.
279 # Once again we could special-case this for skypix dimensions that are
280 # coarser than the common dimension in the same syste, but we don't.
281 self.assert_relation_str(
282 # This query also doesn't need visit or physical_filter joined in,
283 # but we can live with that.
284 f"""
285 Π[detector, instrument](
286 σ[
287 regions_overlap(
288 visit_detector_region.region,
289 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)}
290 )
291 ](
292 →[iteration](
293 select(
294 Π[detector, instrument, visit_detector_region.region](
295 σ[
296 band={self.band!r}
297 and instrument={self.instrument!r}
298 and physical_filter={self.physical_filter!r}
299 and visit={self.visit!r}
300 and htm7={self.htm7!r}
301 ](
302 visit_detector_region_htm7_overlap
303 ⋈ physical_filter
304 ⋈ visit
305 ⋈ visit_detector_region
306 )
307 )
308 )
309 )
310 )
311 )
312 """,
313 self.registry.queryDataIds(
314 ["detector"], visit=self.visit, instrument=self.instrument, htm11=self.htm11
315 ),
316 )
317 # Constrain a regular dimension (detector) via a different dimension
318 # (visit) that combine together to define a more fine-grained region,
319 # and also constrain via the common-skypix system.
320 self.assert_relation_str(
321 # This query also doesn't need visit or physical_filter joined in,
322 # but we can live with that.
323 f"""
324 select(
325 Π[detector, instrument](
326 σ[
327 band={self.band!r}
328 and htm7={self.htm7!r}
329 and instrument={self.instrument!r}
330 and physical_filter={self.physical_filter!r}
331 and visit={self.visit!r}
332 ](
333 visit_detector_region_htm7_overlap
334 ⋈ physical_filter
335 ⋈ visit
336 )
337 )
338 )
339 """,
340 self.registry.queryDataIds(
341 ["detector"], visit=self.visit, instrument=self.instrument, htm7=self.htm7
342 ),
343 )
346if __name__ == "__main__":
347 unittest.main()