Coverage for tests/test_query_relations.py: 25%
50 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28import difflib
29import os
30import os.path
31import re
32import unittest
34from lsst.daf.butler.registry import MissingSpatialOverlapError, RegistryConfig, _RegistryFactory, queries
35from lsst.daf.butler.transfers import YamlRepoImportBackend
37TESTDIR = os.path.abspath(os.path.dirname(__file__))
40class TestQueryRelationsTests(unittest.TestCase):
41 """Tests for registry queries that check that the generated relation tree
42 matches expectations.
44 These tests are somewhat fragile - there are multiple valid relation trees
45 for most registry queries, just as there are multiple valid SQL queries,
46 and since we check the relation tree via string comparisons we are
47 also sensitive to irrelevant things like column ordering. But these
48 differences are deterministic, and checking the relation trees instead of
49 the query results puts a much smaller burden on test-data creation and
50 inspection (as well as making tests go faster), making it much easier to
51 test many combinations of arguments.
53 Note that daf_relation provides good test coverage of the process of going
54 from relation trees to SQL.
55 """
57 @classmethod
58 def setUpClass(cls) -> None:
59 config = RegistryConfig()
60 config["db"] = "sqlite://"
61 cls.registry = _RegistryFactory(config).create_from_config()
62 # We need just enough test data to have valid dimension records for
63 # all of the dimensions we're concerned with, and we want to pick
64 # values for each dimension that correspond to a spatiotemporal
65 # overlap. Without that, we'd be fighting optimizations built into the
66 # query system that simplify things as soon as it can spot that there
67 # will be no overall results.
68 data_file = os.path.normpath(os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
69 with open(data_file) as stream:
70 backend = YamlRepoImportBackend(stream, cls.registry)
71 backend.register()
72 backend.load(datastore=None)
73 assert (
74 cls.registry.dimensions.commonSkyPix.name == "htm7"
75 ), "If this changes, update the skypix levels below to have one below and one above."
76 cls.htm7 = 222340
77 cls.htm11 = 56919188
78 cls.instrument = "HSC"
79 cls.skymap = "hsc_rings_v1"
80 cls.visit = 404
81 cls.tract = 9615
82 cls.detector = 0
83 cls.patch = 14
84 cls.data_id = cls.registry.expandDataId(
85 htm7=cls.htm7,
86 htm11=cls.htm11,
87 instrument=cls.instrument,
88 skymap=cls.skymap,
89 visit=cls.visit,
90 tract=cls.tract,
91 detector=cls.detector,
92 patch=cls.patch,
93 )
94 cls.band = cls.data_id["band"]
95 cls.physical_filter = cls.data_id["physical_filter"]
97 def assert_relation_str(
98 self,
99 expected: str,
100 *results: queries.DataCoordinateQueryResults
101 | queries.DimensionRecordQueryResults
102 | queries.ParentDatasetQueryResults,
103 ) -> None:
104 """Assert that checks that one or more registry
105 queries have relation trees that match the given string.
107 Parameters
108 ----------
109 expected : `str`
110 Expected relation tree, corresponding to
111 ``lsst.daf.relation.Relation.__str__`` (which is much more concise
112 and readable than the `repr` version, once you get used to it).
113 Any newlines and indentation will be stripped.
114 *results
115 Query result objects returned by queryDataIds,
116 queryDimensionRecords, or queryDatasets.
117 """
118 # Drop newlines and leading/trailing space.
119 expected = expected.replace("\n", " ").strip()
120 # Drop duplicate spaces (i.e. indentation).
121 expected = re.sub(r" \s+", " ", expected)
122 # Drop spaces next to parentheses and square brackets.
123 expected = re.sub(r"\s*(\[|\(|\)|\])\s*", r"\1", expected)
124 differ = difflib.Differ()
125 for n, result in enumerate(results):
126 result_str = str(result._query.relation)
127 if expected != result_str:
128 message_lines = [f"Unexpected relation string for query {n}:"]
129 message_lines.extend(
130 differ.compare(
131 [expected],
132 [result_str],
133 )
134 )
135 raise AssertionError("\n".join(message_lines))
137 def test_spatial_constraints(self) -> None:
138 """Test query constraints from data IDs and WHERE clauses that imply a
139 spatial region.
140 """
141 # Constrain one set of regular spatial dimensions from another.
142 # This needs post-query filtering in the iteration engine.
143 self.assert_relation_str(
144 f"""
145 Π[band, patch, skymap, tract](
146 σ[regions_overlap(patch.region, visit_detector_region.region)](
147 →[iteration](
148 select(
149 Π[band, patch, patch.region, skymap, tract, visit_detector_region.region](
150 σ[
151 band={self.band!r}
152 and instrument={self.instrument!r}
153 and detector={self.detector!r}
154 and physical_filter={self.physical_filter!r}
155 and visit={self.visit!r}
156 ](
157 patch_htm7_overlap
158 ⋈ visit_detector_region_htm7_overlap
159 ⋈ physical_filter
160 ⋈ patch
161 ⋈ visit
162 ⋈ visit_detector_region
163 )
164 )
165 )
166 )
167 )
168 )
169 """,
170 self.registry.queryDataIds(
171 ["patch", "band"], instrument=self.instrument, visit=self.visit, detector=self.detector
172 ),
173 self.registry.queryDataIds(
174 ["patch", "band"],
175 where=(
176 f"band={self.band!r} "
177 f"and instrument={self.instrument!r} "
178 f"and detector={self.detector!r} "
179 f"and physical_filter={self.physical_filter!r} "
180 f"and visit={self.visit!r}"
181 ),
182 ),
183 )
184 # Constrain the special common skypix dimension from a regular
185 # dimension. This does not need any post-query filtering.
186 self.assert_relation_str(
187 # It would be better if this query didn't join in visit and
188 # physical_filter - it does that to ensure all implied dimension
189 # relationships are satisfied in the results, but the dimensions
190 # implied by visit are not present in the results and play no role
191 # in the constraints. But it'd be hard to fix that and any fix
192 # would be very rarely exercised.
193 f"""
194 select(
195 Π[htm7](
196 σ[
197 band={self.band!r}
198 and instrument={self.instrument!r}
199 and detector={self.detector!r}
200 and physical_filter={self.physical_filter!r}
201 and visit={self.visit!r}
202 ](
203 visit_detector_region_htm7_overlap
204 ⋈ physical_filter
205 ⋈ visit
206 )
207 )
208 )
209 """,
210 self.registry.queryDataIds(
211 ["htm7"], instrument=self.instrument, visit=self.visit, detector=self.detector
212 ),
213 # For regular dimension constraints we can also support having the
214 # data ID expressed as a 'where' expression. The query would also
215 # have the same behavior with only visit and detector specified
216 # in the 'where' string, but it'd change the expected string.
217 self.registry.queryDataIds(
218 ["htm7"],
219 where=(
220 f"band={self.band!r} "
221 f"and instrument={self.instrument!r} "
222 f"and detector={self.detector!r} "
223 f"and physical_filter={self.physical_filter!r} "
224 f"and visit={self.visit!r}"
225 ),
226 ),
227 )
228 # We can't constrain any other skypix system spatially, because we
229 # don't have overlap rows for those in the database. But in the future
230 # we might be able to fake it with an iteration-engine spatial join, or
231 # utilize explicitly-materialized overlaps.
232 with self.assertRaises(MissingSpatialOverlapError):
233 self.registry.queryDataIds(
234 ["htm11"],
235 instrument=self.instrument,
236 visit=self.visit,
237 detector=self.detector,
238 )
239 # Constrain a regular spatial dimension (patch) from a non-common
240 # skypix dimension common. In general this requires post-query
241 # filtering to get only the patches that overlap the skypix pixel. We
242 # could special-case skypix dimensions that are coarser than the common
243 # dimension and part of the same system to simplify both the SQL query
244 # and avoid post-query filtering, but we don't at present.
245 self.assert_relation_str(
246 f"""
247 Π[patch, skymap, tract](
248 σ[
249 regions_overlap(
250 patch.region,
251 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)}
252 )
253 ](
254 →[iteration](
255 select(
256 Π[patch, patch.region, skymap, tract](
257 σ[htm7={self.htm7!r}](
258 patch_htm7_overlap ⋈ patch
259 )
260 )
261 )
262 )
263 )
264 )
265 """,
266 self.registry.queryDataIds(["patch"], htm11=self.htm11),
267 )
268 # Constrain a regular spatial dimension (patch) from the common
269 # skypix dimension. This does not require post-query filtering.
270 self.assert_relation_str(
271 f"""
272 select(
273 Π[patch, skymap, tract](
274 σ[htm7={self.htm7!r}](
275 patch_htm7_overlap
276 )
277 )
278 )
279 """,
280 self.registry.queryDataIds(["patch"], htm7=self.htm7),
281 )
282 # Constrain a regular dimension (detector) via a different dimension
283 # (visit) that combine together to define a more fine-grained region,
284 # and also constrain via a skypix dimension other than the common one.
285 # Once again we could special-case this for skypix dimensions that are
286 # coarser than the common dimension in the same syste, but we don't.
287 self.assert_relation_str(
288 # This query also doesn't need visit or physical_filter joined in,
289 # but we can live with that.
290 f"""
291 Π[detector, instrument](
292 σ[
293 regions_overlap(
294 visit_detector_region.region,
295 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)}
296 )
297 ](
298 →[iteration](
299 select(
300 Π[detector, instrument, visit_detector_region.region](
301 σ[
302 band={self.band!r}
303 and instrument={self.instrument!r}
304 and physical_filter={self.physical_filter!r}
305 and visit={self.visit!r}
306 and htm7={self.htm7!r}
307 ](
308 visit_detector_region_htm7_overlap
309 ⋈ physical_filter
310 ⋈ visit
311 ⋈ visit_detector_region
312 )
313 )
314 )
315 )
316 )
317 )
318 """,
319 self.registry.queryDataIds(
320 ["detector"], visit=self.visit, instrument=self.instrument, htm11=self.htm11
321 ),
322 )
323 # Constrain a regular dimension (detector) via a different dimension
324 # (visit) that combine together to define a more fine-grained region,
325 # and also constrain via the common-skypix system.
326 self.assert_relation_str(
327 # This query also doesn't need visit or physical_filter joined in,
328 # but we can live with that.
329 f"""
330 select(
331 Π[detector, instrument](
332 σ[
333 band={self.band!r}
334 and htm7={self.htm7!r}
335 and instrument={self.instrument!r}
336 and physical_filter={self.physical_filter!r}
337 and visit={self.visit!r}
338 ](
339 visit_detector_region_htm7_overlap
340 ⋈ physical_filter
341 ⋈ visit
342 )
343 )
344 )
345 """,
346 self.registry.queryDataIds(
347 ["detector"], visit=self.visit, instrument=self.instrument, htm7=self.htm7
348 ),
349 )
352if __name__ == "__main__":
353 unittest.main()