Coverage for tests/test_query_relations.py: 26%
52 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-30 02:51 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-30 02:51 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28import difflib
29import os
30import os.path
31import re
32import unittest
34from lsst.daf.butler import DataCoordinateQueryResults, DatasetRefQueryResults, DimensionRecordQueryResults
35from lsst.daf.butler.registry import MissingSpatialOverlapError, RegistryConfig, _RegistryFactory
36from lsst.daf.butler.transfers import YamlRepoImportBackend
38TESTDIR = os.path.abspath(os.path.dirname(__file__))
41class TestQueryRelationsTests(unittest.TestCase):
42 """Tests for registry queries that check that the generated relation tree
43 matches expectations.
45 These tests are somewhat fragile - there are multiple valid relation trees
46 for most registry queries, just as there are multiple valid SQL queries,
47 and since we check the relation tree via string comparisons we are
48 also sensitive to irrelevant things like column ordering. But these
49 differences are deterministic, and checking the relation trees instead of
50 the query results puts a much smaller burden on test-data creation and
51 inspection (as well as making tests go faster), making it much easier to
52 test many combinations of arguments.
54 Note that daf_relation provides good test coverage of the process of going
55 from relation trees to SQL.
56 """
58 @classmethod
59 def setUpClass(cls) -> None:
60 config = RegistryConfig()
61 config["db"] = "sqlite://"
62 cls.registry = _RegistryFactory(config).create_from_config()
63 # We need just enough test data to have valid dimension records for
64 # all of the dimensions we're concerned with, and we want to pick
65 # values for each dimension that correspond to a spatiotemporal
66 # overlap. Without that, we'd be fighting optimizations built into the
67 # query system that simplify things as soon as it can spot that there
68 # will be no overall results.
69 data_file = os.path.normpath(os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
70 with open(data_file) as stream:
71 backend = YamlRepoImportBackend(stream, cls.registry)
72 backend.register()
73 backend.load(datastore=None)
74 assert (
75 cls.registry.dimensions.commonSkyPix.name == "htm7"
76 ), "If this changes, update the skypix levels below to have one below and one above."
77 cls.htm7 = 222340
78 cls.htm11 = 56919188
79 cls.instrument = "HSC"
80 cls.skymap = "hsc_rings_v1"
81 cls.visit = 404
82 cls.tract = 9615
83 cls.detector = 0
84 cls.patch = 14
85 cls.data_id = cls.registry.expandDataId(
86 htm7=cls.htm7,
87 htm11=cls.htm11,
88 instrument=cls.instrument,
89 skymap=cls.skymap,
90 visit=cls.visit,
91 tract=cls.tract,
92 detector=cls.detector,
93 patch=cls.patch,
94 )
95 cls.day_obs = cls.data_id["day_obs"]
96 cls.band = cls.data_id["band"]
97 cls.physical_filter = cls.data_id["physical_filter"]
99 def assert_relation_str(
100 self,
101 expected: str,
102 *results: DataCoordinateQueryResults | DimensionRecordQueryResults | DatasetRefQueryResults,
103 ) -> None:
104 """Assert that checks that one or more registry
105 queries have relation trees that match the given string.
107 Parameters
108 ----------
109 expected : `str`
110 Expected relation tree, corresponding to
111 ``lsst.daf.relation.Relation.__str__`` (which is much more concise
112 and readable than the `repr` version, once you get used to it).
113 Any newlines and indentation will be stripped.
114 *results
115 Query result objects returned by queryDataIds,
116 queryDimensionRecords, or queryDatasets.
117 """
118 # Drop newlines and leading/trailing space.
119 expected = expected.replace("\n", " ").strip()
120 # Drop duplicate spaces (i.e. indentation).
121 expected = re.sub(r" \s+", " ", expected)
122 # Drop spaces next to parentheses and square brackets.
123 expected = re.sub(r"\s*(\[|\(|\)|\])\s*", r"\1", expected)
124 differ = difflib.Differ()
125 for n, result in enumerate(results):
126 result_str = str(result._query.relation)
127 if expected != result_str:
128 message_lines = [f"Unexpected relation string for query {n}:"]
129 message_lines.extend(
130 differ.compare(
131 [expected],
132 [result_str],
133 )
134 )
135 raise AssertionError("\n".join(message_lines))
137 def test_spatial_constraints(self) -> None:
138 """Test query constraints from data IDs and WHERE clauses that imply a
139 spatial region.
140 """
141 # Constrain one set of regular spatial dimensions from another.
142 # This needs post-query filtering in the iteration engine.
143 self.assert_relation_str(
144 f"""
145 Π[band, patch, skymap, tract](
146 σ[regions_overlap(patch.region, visit_detector_region.region)](
147 →[iteration](
148 select(
149 Π[band, patch, patch.region, skymap, tract, visit_detector_region.region](
150 σ[
151 instrument={self.instrument!r}
152 and detector={self.detector!r}
153 and visit={self.visit!r}
154 and band={self.band!r}
155 and day_obs={self.day_obs!r}
156 and physical_filter={self.physical_filter!r}
157 ](
158 patch_htm7_overlap
159 ⋈ visit_detector_region_htm7_overlap
160 ⋈ physical_filter
161 ⋈ patch
162 ⋈ visit
163 ⋈ visit_detector_region
164 )
165 )
166 )
167 )
168 )
169 )
170 """,
171 self.registry.queryDataIds(
172 ["patch", "band"], instrument=self.instrument, visit=self.visit, detector=self.detector
173 ),
174 self.registry.queryDataIds(
175 ["patch", "band"],
176 where=(
177 f"instrument={self.instrument!r} "
178 f"and detector={self.detector!r} "
179 f"and visit={self.visit!r}"
180 f"and band={self.band!r} "
181 f"and day_obs={self.day_obs!r}"
182 f"and physical_filter={self.physical_filter!r} "
183 ),
184 ),
185 )
186 # Constrain the special common skypix dimension from a regular
187 # dimension. This does not need any post-query filtering.
188 self.assert_relation_str(
189 # It would be better if this query didn't join in visit and
190 # physical_filter - it does that to ensure all implied dimension
191 # relationships are satisfied in the results, but the dimensions
192 # implied by visit are not present in the results and play no role
193 # in the constraints. But it'd be hard to fix that and any fix
194 # would be very rarely exercised.
195 f"""
196 select(
197 Π[htm7](
198 σ[
199 instrument={self.instrument!r}
200 and detector={self.detector!r}
201 and visit={self.visit!r}
202 and band={self.band!r}
203 and day_obs={self.day_obs!r}
204 and physical_filter={self.physical_filter!r}
205 ](
206 visit_detector_region_htm7_overlap
207 ⋈ physical_filter
208 ⋈ visit
209 )
210 )
211 )
212 """,
213 self.registry.queryDataIds(
214 ["htm7"], instrument=self.instrument, visit=self.visit, detector=self.detector
215 ),
216 # For regular dimension constraints we can also support having the
217 # data ID expressed as a 'where' expression. The query would also
218 # have the same behavior with only visit and detector specified
219 # in the 'where' string, but it'd change the expected string.
220 self.registry.queryDataIds(
221 ["htm7"],
222 where=(
223 f"instrument={self.instrument!r} "
224 f"and detector={self.detector!r} "
225 f"and visit={self.visit!r}"
226 f"and band={self.band!r} "
227 f"and day_obs={self.day_obs!r}"
228 f"and physical_filter={self.physical_filter!r} "
229 ),
230 ),
231 )
232 # We can't constrain any other skypix system spatially, because we
233 # don't have overlap rows for those in the database. But in the future
234 # we might be able to fake it with an iteration-engine spatial join, or
235 # utilize explicitly-materialized overlaps.
236 with self.assertRaises(MissingSpatialOverlapError):
237 self.registry.queryDataIds(
238 ["htm11"],
239 instrument=self.instrument,
240 visit=self.visit,
241 detector=self.detector,
242 )
243 # Constrain a regular spatial dimension (patch) from a non-common
244 # skypix dimension common. In general this requires post-query
245 # filtering to get only the patches that overlap the skypix pixel. We
246 # could special-case skypix dimensions that are coarser than the common
247 # dimension and part of the same system to simplify both the SQL query
248 # and avoid post-query filtering, but we don't at present.
249 self.assert_relation_str(
250 f"""
251 Π[patch, skymap, tract](
252 σ[
253 regions_overlap(
254 patch.region,
255 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)}
256 )
257 ](
258 →[iteration](
259 select(
260 Π[patch, patch.region, skymap, tract](
261 σ[htm7={self.htm7!r}](
262 patch_htm7_overlap ⋈ patch
263 )
264 )
265 )
266 )
267 )
268 )
269 """,
270 self.registry.queryDataIds(["patch"], htm11=self.htm11),
271 )
272 # Constrain a regular spatial dimension (patch) from the common
273 # skypix dimension. This does not require post-query filtering.
274 self.assert_relation_str(
275 f"""
276 select(
277 Π[patch, skymap, tract](
278 σ[htm7={self.htm7!r}](
279 patch_htm7_overlap
280 )
281 )
282 )
283 """,
284 self.registry.queryDataIds(["patch"], htm7=self.htm7),
285 )
286 # Constrain a regular dimension (detector) via a different dimension
287 # (visit) that combine together to define a more fine-grained region,
288 # and also constrain via a skypix dimension other than the common one.
289 # Once again we could special-case this for skypix dimensions that are
290 # coarser than the common dimension in the same syste, but we don't.
291 self.assert_relation_str(
292 # This query also doesn't need visit or physical_filter joined in,
293 # but we can live with that.
294 f"""
295 Π[detector, instrument](
296 σ[
297 regions_overlap(
298 visit_detector_region.region,
299 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)}
300 )
301 ](
302 →[iteration](
303 select(
304 Π[detector, instrument, visit_detector_region.region](
305 σ[
306 instrument={self.instrument!r}
307 and visit={self.visit!r}
308 and band={self.band!r}
309 and day_obs={self.day_obs!r}
310 and physical_filter={self.physical_filter!r}
311 and htm7={self.htm7!r}
312 ](
313 visit_detector_region_htm7_overlap
314 ⋈ physical_filter
315 ⋈ visit
316 ⋈ visit_detector_region
317 )
318 )
319 )
320 )
321 )
322 )
323 """,
324 self.registry.queryDataIds(
325 ["detector"], visit=self.visit, instrument=self.instrument, htm11=self.htm11
326 ),
327 )
328 # Constrain a regular dimension (detector) via a different dimension
329 # (visit) that combine together to define a more fine-grained region,
330 # and also constrain via the common-skypix system.
331 self.assert_relation_str(
332 # This query also doesn't need visit or physical_filter joined in,
333 # but we can live with that.
334 f"""
335 select(
336 Π[detector, instrument](
337 σ[
338 htm7={self.htm7!r}
339 and instrument={self.instrument!r}
340 and visit={self.visit!r}
341 and band={self.band!r}
342 and day_obs={self.day_obs!r}
343 and physical_filter={self.physical_filter!r}
344 ](
345 visit_detector_region_htm7_overlap
346 ⋈ physical_filter
347 ⋈ visit
348 )
349 )
350 )
351 """,
352 self.registry.queryDataIds(
353 ["detector"], visit=self.visit, instrument=self.instrument, htm7=self.htm7
354 ),
355 )
358if __name__ == "__main__":
359 unittest.main()