Coverage for tests/test_expressions.py: 23%
151 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 02:53 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 02:53 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28import datetime
29import unittest
31import astropy.time
32import sqlalchemy
33from lsst.daf.butler import (
34 ColumnTypeInfo,
35 DataCoordinate,
36 DatasetColumnTag,
37 DimensionUniverse,
38 ddl,
39 time_utils,
40)
41from lsst.daf.butler.registry.queries.expressions import make_string_expression_predicate
42from lsst.daf.butler.registry.queries.expressions.check import CheckVisitor, InspectionVisitor
43from lsst.daf.butler.registry.queries.expressions.normalForm import NormalForm, NormalFormExpression
44from lsst.daf.butler.registry.queries.expressions.parser import ParserYacc
45from lsst.daf.butler.timespan_database_representation import TimespanDatabaseRepresentation
46from lsst.daf.relation import ColumnContainer, ColumnExpression
47from sqlalchemy.schema import Column
50class FakeDatasetRecordStorageManager:
51 """Fake class for representing dataset record storage."""
53 ingestDate = Column("ingest_date")
56class ConvertExpressionToPredicateTestCase(unittest.TestCase):
57 """A test case for the make_string_expression_predicate function"""
59 ingest_date_dtype = sqlalchemy.TIMESTAMP
60 ingest_date_pytype = datetime.datetime
61 ingest_date_literal = datetime.datetime(2020, 1, 1)
63 def setUp(self):
64 self.column_types = ColumnTypeInfo(
65 timespan_cls=TimespanDatabaseRepresentation.Compound,
66 universe=DimensionUniverse(),
67 dataset_id_spec=ddl.FieldSpec("dataset_id", dtype=ddl.GUID),
68 run_key_spec=ddl.FieldSpec("run_id", dtype=sqlalchemy.BigInteger),
69 ingest_date_dtype=self.ingest_date_dtype,
70 )
72 def test_simple(self):
73 """Test with a trivial expression"""
74 self.assertEqual(
75 make_string_expression_predicate(
76 "1 > 0", self.column_types.universe.empty, column_types=self.column_types
77 )[0],
78 ColumnExpression.literal(1, dtype=int).gt(ColumnExpression.literal(0, dtype=int)),
79 )
81 def test_time(self):
82 """Test with a trivial expression including times"""
83 time_converter = time_utils.TimeConverter()
84 self.assertEqual(
85 make_string_expression_predicate(
86 "T'1970-01-01 00:00/tai' < T'2020-01-01 00:00/tai'",
87 self.column_types.universe.empty,
88 column_types=self.column_types,
89 )[0],
90 ColumnExpression.literal(time_converter.nsec_to_astropy(0), dtype=astropy.time.Time).lt(
91 ColumnExpression.literal(
92 time_converter.nsec_to_astropy(1577836800000000000), dtype=astropy.time.Time
93 )
94 ),
95 )
97 def test_ingest_date(self):
98 """Test with an expression including ingest_date which is native UTC"""
99 self.assertEqual(
100 make_string_expression_predicate(
101 "ingest_date < T'2020-01-01 00:00/utc'",
102 self.column_types.universe.empty,
103 column_types=self.column_types,
104 dataset_type_name="fake",
105 )[0],
106 ColumnExpression.reference(
107 DatasetColumnTag("fake", "ingest_date"), dtype=self.ingest_date_pytype
108 ).lt(ColumnExpression.literal(self.ingest_date_literal, dtype=self.ingest_date_pytype)),
109 )
111 def test_bind(self):
112 """Test with bind parameters"""
113 self.assertEqual(
114 make_string_expression_predicate(
115 "a > b OR t in (x, y, z)",
116 self.column_types.universe.empty,
117 column_types=self.column_types,
118 bind={"a": 1, "b": 2, "t": 0, "x": 10, "y": 20, "z": 30},
119 )[0],
120 ColumnExpression.literal(1, dtype=int)
121 .gt(ColumnExpression.literal(2, dtype=int))
122 .logical_or(
123 ColumnContainer.sequence(
124 [
125 ColumnExpression.literal(10, dtype=int),
126 ColumnExpression.literal(20, dtype=int),
127 ColumnExpression.literal(30, dtype=int),
128 ],
129 dtype=int,
130 ).contains(ColumnExpression.literal(0, dtype=int))
131 ),
132 )
134 def test_bind_list(self):
135 """Test with bind parameter which is list/tuple/set inside IN rhs."""
136 self.assertEqual(
137 make_string_expression_predicate(
138 "a > b OR t in (x)",
139 self.column_types.universe.empty,
140 column_types=self.column_types,
141 bind={"a": 1, "b": 2, "t": 0, "x": (10, 20, 30)},
142 )[0],
143 ColumnExpression.literal(1, dtype=int)
144 .gt(ColumnExpression.literal(2, dtype=int))
145 .logical_or(
146 ColumnContainer.sequence(
147 [
148 ColumnExpression.literal(10, dtype=int),
149 ColumnExpression.literal(20, dtype=int),
150 ColumnExpression.literal(30, dtype=int),
151 ],
152 dtype=int,
153 ).contains(
154 ColumnExpression.literal(0, dtype=int),
155 )
156 ),
157 )
158 # Couple of bound variables inside IN() with different combinations
159 # of scalars and list.
160 self.assertEqual(
161 make_string_expression_predicate(
162 "a > b OR t in (x, y)",
163 self.column_types.universe.empty,
164 column_types=self.column_types,
165 bind={"a": 1, "b": 2, "t": 0, "x": 10, "y": 20},
166 )[0],
167 ColumnExpression.literal(1, dtype=int)
168 .gt(ColumnExpression.literal(2, dtype=int))
169 .logical_or(
170 ColumnContainer.sequence(
171 [
172 ColumnExpression.literal(10, dtype=int),
173 ColumnExpression.literal(20, dtype=int),
174 ],
175 dtype=int,
176 ).contains(
177 ColumnExpression.literal(0, dtype=int),
178 )
179 ),
180 )
181 self.assertEqual(
182 make_string_expression_predicate(
183 "a > b OR t in (x, y)",
184 self.column_types.universe.empty,
185 column_types=self.column_types,
186 bind={"a": 1, "b": 2, "t": 0, "x": [10, 30], "y": 20},
187 )[0],
188 ColumnExpression.literal(1, dtype=int)
189 .gt(ColumnExpression.literal(2, dtype=int))
190 .logical_or(
191 ColumnContainer.sequence(
192 [
193 ColumnExpression.literal(10, dtype=int),
194 ColumnExpression.literal(30, dtype=int),
195 ColumnExpression.literal(20, dtype=int),
196 ],
197 dtype=int,
198 ).contains(
199 ColumnExpression.literal(0, dtype=int),
200 )
201 ),
202 )
203 self.assertEqual(
204 make_string_expression_predicate(
205 "a > b OR t in (x, y)",
206 self.column_types.universe.empty,
207 column_types=self.column_types,
208 bind={"a": 1, "b": 2, "t": 0, "x": (10, 30), "y": {20}},
209 )[0],
210 ColumnExpression.literal(1, dtype=int)
211 .gt(ColumnExpression.literal(2, dtype=int))
212 .logical_or(
213 ColumnContainer.sequence(
214 [
215 ColumnExpression.literal(10, dtype=int),
216 ColumnExpression.literal(30, dtype=int),
217 ColumnExpression.literal(20, dtype=int),
218 ],
219 dtype=int,
220 ).contains(ColumnExpression.literal(0, dtype=int))
221 ),
222 )
225class ConvertExpressionToPredicateTestCaseAstropy(ConvertExpressionToPredicateTestCase):
226 """A test case for the make_string_expression_predicate function with
227 ingest_date defined as nanoseconds.
228 """
230 ingest_date_dtype = ddl.AstropyTimeNsecTai
231 ingest_date_pytype = astropy.time.Time
232 ingest_date_literal = astropy.time.Time(datetime.datetime(2020, 1, 1), scale="utc")
235class InspectionVisitorTestCase(unittest.TestCase):
236 """Tests for InspectionVisitor class."""
238 def test_simple(self):
239 """Test for simple expressions"""
240 universe = DimensionUniverse()
241 parser = ParserYacc()
243 tree = parser.parse("instrument = 'LSST'")
244 bind = {}
245 summary = tree.visit(InspectionVisitor(universe, bind))
246 self.assertEqual(summary.dimensions, {"instrument"})
247 self.assertFalse(summary.columns)
248 self.assertFalse(summary.hasIngestDate)
249 self.assertEqual(summary.dataIdKey, universe["instrument"])
250 self.assertEqual(summary.dataIdValue, "LSST")
252 tree = parser.parse("instrument != 'LSST'")
253 summary = tree.visit(InspectionVisitor(universe, bind))
254 self.assertEqual(summary.dimensions, {"instrument"})
255 self.assertFalse(summary.columns)
256 self.assertIsNone(summary.dataIdKey)
257 self.assertIsNone(summary.dataIdValue)
259 tree = parser.parse("instrument = 'LSST' AND visit = 1")
260 summary = tree.visit(InspectionVisitor(universe, bind))
261 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter", "day_obs"})
262 self.assertFalse(summary.columns)
263 self.assertIsNone(summary.dataIdKey)
264 self.assertIsNone(summary.dataIdValue)
266 tree = parser.parse("instrument = 'LSST' AND visit = 1 AND skymap = 'x'")
267 summary = tree.visit(InspectionVisitor(universe, bind))
268 self.assertEqual(
269 summary.dimensions, {"instrument", "visit", "band", "physical_filter", "skymap", "day_obs"}
270 )
271 self.assertFalse(summary.columns)
272 self.assertIsNone(summary.dataIdKey)
273 self.assertIsNone(summary.dataIdValue)
275 def test_bind(self):
276 """Test for simple expressions with binds."""
277 universe = DimensionUniverse()
278 parser = ParserYacc()
280 tree = parser.parse("instrument = instr")
281 bind = {"instr": "LSST"}
282 summary = tree.visit(InspectionVisitor(universe, bind))
283 self.assertEqual(summary.dimensions, {"instrument"})
284 self.assertFalse(summary.hasIngestDate)
285 self.assertEqual(summary.dataIdKey, universe["instrument"])
286 self.assertEqual(summary.dataIdValue, "LSST")
288 tree = parser.parse("instrument != instr")
289 self.assertEqual(summary.dimensions, {"instrument"})
290 summary = tree.visit(InspectionVisitor(universe, bind))
291 self.assertIsNone(summary.dataIdKey)
292 self.assertIsNone(summary.dataIdValue)
294 tree = parser.parse("instrument = instr AND visit = visit_id")
295 bind = {"instr": "LSST", "visit_id": 1}
296 summary = tree.visit(InspectionVisitor(universe, bind))
297 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter", "day_obs"})
298 self.assertIsNone(summary.dataIdKey)
299 self.assertIsNone(summary.dataIdValue)
301 tree = parser.parse("instrument = 'LSST' AND visit = 1 AND skymap = skymap_name")
302 bind = {"instr": "LSST", "visit_id": 1, "skymap_name": "x"}
303 summary = tree.visit(InspectionVisitor(universe, bind))
304 self.assertEqual(
305 summary.dimensions, {"instrument", "visit", "band", "physical_filter", "skymap", "day_obs"}
306 )
307 self.assertIsNone(summary.dataIdKey)
308 self.assertIsNone(summary.dataIdValue)
310 def test_in(self):
311 """Test for IN expressions."""
312 universe = DimensionUniverse()
313 parser = ParserYacc()
315 tree = parser.parse("instrument IN ('LSST')")
316 bind = {}
317 summary = tree.visit(InspectionVisitor(universe, bind))
318 self.assertEqual(summary.dimensions, {"instrument"})
319 self.assertFalse(summary.hasIngestDate)
320 # we do not handle IN with a single item as `=`
321 self.assertIsNone(summary.dataIdKey)
322 self.assertIsNone(summary.dataIdValue)
324 tree = parser.parse("instrument IN (instr)")
325 bind = {"instr": "LSST"}
326 summary = tree.visit(InspectionVisitor(universe, bind))
327 self.assertEqual(summary.dimensions, {"instrument"})
328 self.assertIsNone(summary.dataIdKey)
329 self.assertIsNone(summary.dataIdValue)
331 tree = parser.parse("visit IN (1,2,3)")
332 bind = {}
333 summary = tree.visit(InspectionVisitor(universe, bind))
334 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter", "day_obs"})
335 self.assertIsNone(summary.dataIdKey)
336 self.assertIsNone(summary.dataIdValue)
338 tree = parser.parse("visit IN (visit1, visit2, visit3)")
339 bind = {"visit1": 1, "visit2": 2, "visit3": 3}
340 summary = tree.visit(InspectionVisitor(universe, bind))
341 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter", "day_obs"})
342 self.assertIsNone(summary.dataIdKey)
343 self.assertIsNone(summary.dataIdValue)
345 tree = parser.parse("visit IN (visits)")
346 bind = {"visits": (1, 2, 3)}
347 summary = tree.visit(InspectionVisitor(universe, bind))
348 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter", "day_obs"})
349 self.assertIsNone(summary.dataIdKey)
350 self.assertIsNone(summary.dataIdValue)
353class CheckVisitorTestCase(unittest.TestCase):
354 """Tests for CheckVisitor class."""
356 def test_governor(self):
357 """Test with governor dimension in expression"""
358 parser = ParserYacc()
360 universe = DimensionUniverse()
361 dimensions = universe.conform(("instrument", "visit"))
362 dataId = DataCoordinate.make_empty(universe)
363 defaults = DataCoordinate.make_empty(universe)
365 # governor-only constraint
366 tree = parser.parse("instrument = 'LSST'")
367 expr = NormalFormExpression.fromTree(tree, NormalForm.DISJUNCTIVE)
368 binds = {}
369 visitor = CheckVisitor(dataId, dimensions, binds, defaults)
370 expr.visit(visitor)
372 tree = parser.parse("'LSST' = instrument")
373 expr = NormalFormExpression.fromTree(tree, NormalForm.DISJUNCTIVE)
374 binds = {}
375 visitor = CheckVisitor(dataId, dimensions, binds, defaults)
376 expr.visit(visitor)
378 # use bind for governor
379 tree = parser.parse("instrument = instr")
380 expr = NormalFormExpression.fromTree(tree, NormalForm.DISJUNCTIVE)
381 binds = {"instr": "LSST"}
382 visitor = CheckVisitor(dataId, dimensions, binds, defaults)
383 expr.visit(visitor)
386if __name__ == "__main__":
387 unittest.main()