Coverage for tests/test_query_relations.py: 26%
51 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28import difflib
29import os
30import os.path
31import re
32import unittest
34from lsst.daf.butler import (
35 DataCoordinateQueryResults,
36 DimensionRecordQueryResults,
37 SingleTypeDatasetQueryResults,
38)
39from lsst.daf.butler.registry import MissingSpatialOverlapError, RegistryConfig, _RegistryFactory
40from lsst.daf.butler.transfers import YamlRepoImportBackend
42TESTDIR = os.path.abspath(os.path.dirname(__file__))
45class TestQueryRelationsTests(unittest.TestCase):
46 """Tests for registry queries that check that the generated relation tree
47 matches expectations.
49 These tests are somewhat fragile - there are multiple valid relation trees
50 for most registry queries, just as there are multiple valid SQL queries,
51 and since we check the relation tree via string comparisons we are
52 also sensitive to irrelevant things like column ordering. But these
53 differences are deterministic, and checking the relation trees instead of
54 the query results puts a much smaller burden on test-data creation and
55 inspection (as well as making tests go faster), making it much easier to
56 test many combinations of arguments.
58 Note that daf_relation provides good test coverage of the process of going
59 from relation trees to SQL.
60 """
62 @classmethod
63 def setUpClass(cls) -> None:
64 config = RegistryConfig()
65 config["db"] = "sqlite://"
66 cls.registry = _RegistryFactory(config).create_from_config()
67 # We need just enough test data to have valid dimension records for
68 # all of the dimensions we're concerned with, and we want to pick
69 # values for each dimension that correspond to a spatiotemporal
70 # overlap. Without that, we'd be fighting optimizations built into the
71 # query system that simplify things as soon as it can spot that there
72 # will be no overall results.
73 data_file = os.path.normpath(os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
74 with open(data_file) as stream:
75 backend = YamlRepoImportBackend(stream, cls.registry)
76 backend.register()
77 backend.load(datastore=None)
78 assert (
79 cls.registry.dimensions.commonSkyPix.name == "htm7"
80 ), "If this changes, update the skypix levels below to have one below and one above."
81 cls.htm7 = 222340
82 cls.htm11 = 56919188
83 cls.instrument = "HSC"
84 cls.skymap = "hsc_rings_v1"
85 cls.visit = 404
86 cls.tract = 9615
87 cls.detector = 0
88 cls.patch = 14
89 cls.data_id = cls.registry.expandDataId(
90 htm7=cls.htm7,
91 htm11=cls.htm11,
92 instrument=cls.instrument,
93 skymap=cls.skymap,
94 visit=cls.visit,
95 tract=cls.tract,
96 detector=cls.detector,
97 patch=cls.patch,
98 )
99 cls.band = cls.data_id["band"]
100 cls.physical_filter = cls.data_id["physical_filter"]
102 def assert_relation_str(
103 self,
104 expected: str,
105 *results: DataCoordinateQueryResults | DimensionRecordQueryResults | SingleTypeDatasetQueryResults,
106 ) -> None:
107 """Assert that checks that one or more registry
108 queries have relation trees that match the given string.
110 Parameters
111 ----------
112 expected : `str`
113 Expected relation tree, corresponding to
114 ``lsst.daf.relation.Relation.__str__`` (which is much more concise
115 and readable than the `repr` version, once you get used to it).
116 Any newlines and indentation will be stripped.
117 *results
118 Query result objects returned by queryDataIds,
119 queryDimensionRecords, or queryDatasets.
120 """
121 # Drop newlines and leading/trailing space.
122 expected = expected.replace("\n", " ").strip()
123 # Drop duplicate spaces (i.e. indentation).
124 expected = re.sub(r" \s+", " ", expected)
125 # Drop spaces next to parentheses and square brackets.
126 expected = re.sub(r"\s*(\[|\(|\)|\])\s*", r"\1", expected)
127 differ = difflib.Differ()
128 for n, result in enumerate(results):
129 result_str = str(result._query.relation)
130 if expected != result_str:
131 message_lines = [f"Unexpected relation string for query {n}:"]
132 message_lines.extend(
133 differ.compare(
134 [expected],
135 [result_str],
136 )
137 )
138 raise AssertionError("\n".join(message_lines))
140 def test_spatial_constraints(self) -> None:
141 """Test query constraints from data IDs and WHERE clauses that imply a
142 spatial region.
143 """
144 # Constrain one set of regular spatial dimensions from another.
145 # This needs post-query filtering in the iteration engine.
146 self.assert_relation_str(
147 f"""
148 Π[band, patch, skymap, tract](
149 σ[regions_overlap(patch.region, visit_detector_region.region)](
150 →[iteration](
151 select(
152 Π[band, patch, patch.region, skymap, tract, visit_detector_region.region](
153 σ[
154 instrument={self.instrument!r}
155 and detector={self.detector!r}
156 and visit={self.visit!r}
157 and band={self.band!r}
158 and physical_filter={self.physical_filter!r}
159 ](
160 patch_htm7_overlap
161 ⋈ visit_detector_region_htm7_overlap
162 ⋈ physical_filter
163 ⋈ patch
164 ⋈ visit
165 ⋈ visit_detector_region
166 )
167 )
168 )
169 )
170 )
171 )
172 """,
173 self.registry.queryDataIds(
174 ["patch", "band"], instrument=self.instrument, visit=self.visit, detector=self.detector
175 ),
176 self.registry.queryDataIds(
177 ["patch", "band"],
178 where=(
179 f"instrument={self.instrument!r} "
180 f"and detector={self.detector!r} "
181 f"and visit={self.visit!r}"
182 f"and band={self.band!r} "
183 f"and physical_filter={self.physical_filter!r} "
184 ),
185 ),
186 )
187 # Constrain the special common skypix dimension from a regular
188 # dimension. This does not need any post-query filtering.
189 self.assert_relation_str(
190 # It would be better if this query didn't join in visit and
191 # physical_filter - it does that to ensure all implied dimension
192 # relationships are satisfied in the results, but the dimensions
193 # implied by visit are not present in the results and play no role
194 # in the constraints. But it'd be hard to fix that and any fix
195 # would be very rarely exercised.
196 f"""
197 select(
198 Π[htm7](
199 σ[
200 instrument={self.instrument!r}
201 and detector={self.detector!r}
202 and visit={self.visit!r}
203 and band={self.band!r}
204 and physical_filter={self.physical_filter!r}
205 ](
206 visit_detector_region_htm7_overlap
207 ⋈ physical_filter
208 ⋈ visit
209 )
210 )
211 )
212 """,
213 self.registry.queryDataIds(
214 ["htm7"], instrument=self.instrument, visit=self.visit, detector=self.detector
215 ),
216 # For regular dimension constraints we can also support having the
217 # data ID expressed as a 'where' expression. The query would also
218 # have the same behavior with only visit and detector specified
219 # in the 'where' string, but it'd change the expected string.
220 self.registry.queryDataIds(
221 ["htm7"],
222 where=(
223 f"instrument={self.instrument!r} "
224 f"and detector={self.detector!r} "
225 f"and visit={self.visit!r}"
226 f"and band={self.band!r} "
227 f"and physical_filter={self.physical_filter!r} "
228 ),
229 ),
230 )
231 # We can't constrain any other skypix system spatially, because we
232 # don't have overlap rows for those in the database. But in the future
233 # we might be able to fake it with an iteration-engine spatial join, or
234 # utilize explicitly-materialized overlaps.
235 with self.assertRaises(MissingSpatialOverlapError):
236 self.registry.queryDataIds(
237 ["htm11"],
238 instrument=self.instrument,
239 visit=self.visit,
240 detector=self.detector,
241 )
242 # Constrain a regular spatial dimension (patch) from a non-common
243 # skypix dimension common. In general this requires post-query
244 # filtering to get only the patches that overlap the skypix pixel. We
245 # could special-case skypix dimensions that are coarser than the common
246 # dimension and part of the same system to simplify both the SQL query
247 # and avoid post-query filtering, but we don't at present.
248 self.assert_relation_str(
249 f"""
250 Π[patch, skymap, tract](
251 σ[
252 regions_overlap(
253 patch.region,
254 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)}
255 )
256 ](
257 →[iteration](
258 select(
259 Π[patch, patch.region, skymap, tract](
260 σ[htm7={self.htm7!r}](
261 patch_htm7_overlap ⋈ patch
262 )
263 )
264 )
265 )
266 )
267 )
268 """,
269 self.registry.queryDataIds(["patch"], htm11=self.htm11),
270 )
271 # Constrain a regular spatial dimension (patch) from the common
272 # skypix dimension. This does not require post-query filtering.
273 self.assert_relation_str(
274 f"""
275 select(
276 Π[patch, skymap, tract](
277 σ[htm7={self.htm7!r}](
278 patch_htm7_overlap
279 )
280 )
281 )
282 """,
283 self.registry.queryDataIds(["patch"], htm7=self.htm7),
284 )
285 # Constrain a regular dimension (detector) via a different dimension
286 # (visit) that combine together to define a more fine-grained region,
287 # and also constrain via a skypix dimension other than the common one.
288 # Once again we could special-case this for skypix dimensions that are
289 # coarser than the common dimension in the same syste, but we don't.
290 self.assert_relation_str(
291 # This query also doesn't need visit or physical_filter joined in,
292 # but we can live with that.
293 f"""
294 Π[detector, instrument](
295 σ[
296 regions_overlap(
297 visit_detector_region.region,
298 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)}
299 )
300 ](
301 →[iteration](
302 select(
303 Π[detector, instrument, visit_detector_region.region](
304 σ[
305 instrument={self.instrument!r}
306 and visit={self.visit!r}
307 and band={self.band!r}
308 and physical_filter={self.physical_filter!r}
309 and htm7={self.htm7!r}
310 ](
311 visit_detector_region_htm7_overlap
312 ⋈ physical_filter
313 ⋈ visit
314 ⋈ visit_detector_region
315 )
316 )
317 )
318 )
319 )
320 )
321 """,
322 self.registry.queryDataIds(
323 ["detector"], visit=self.visit, instrument=self.instrument, htm11=self.htm11
324 ),
325 )
326 # Constrain a regular dimension (detector) via a different dimension
327 # (visit) that combine together to define a more fine-grained region,
328 # and also constrain via the common-skypix system.
329 self.assert_relation_str(
330 # This query also doesn't need visit or physical_filter joined in,
331 # but we can live with that.
332 f"""
333 select(
334 Π[detector, instrument](
335 σ[
336 htm7={self.htm7!r}
337 and instrument={self.instrument!r}
338 and visit={self.visit!r}
339 and band={self.band!r}
340 and physical_filter={self.physical_filter!r}
341 ](
342 visit_detector_region_htm7_overlap
343 ⋈ physical_filter
344 ⋈ visit
345 )
346 )
347 )
348 """,
349 self.registry.queryDataIds(
350 ["detector"], visit=self.visit, instrument=self.instrument, htm7=self.htm7
351 ),
352 )
355if __name__ == "__main__":
356 unittest.main()