Coverage for tests/test_expressions.py: 22%

150 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:53 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28import datetime 

29import unittest 

30 

31import astropy.time 

32import sqlalchemy 

33from lsst.daf.butler import ( 

34 ColumnTypeInfo, 

35 DataCoordinate, 

36 DatasetColumnTag, 

37 DimensionUniverse, 

38 TimespanDatabaseRepresentation, 

39 ddl, 

40 time_utils, 

41) 

42from lsst.daf.butler.registry.queries.expressions import make_string_expression_predicate 

43from lsst.daf.butler.registry.queries.expressions.check import CheckVisitor, InspectionVisitor 

44from lsst.daf.butler.registry.queries.expressions.normalForm import NormalForm, NormalFormExpression 

45from lsst.daf.butler.registry.queries.expressions.parser import ParserYacc 

46from lsst.daf.relation import ColumnContainer, ColumnExpression 

47from sqlalchemy.schema import Column 

48 

49 

50class FakeDatasetRecordStorageManager: 

51 """Fake class for representing dataset record storage.""" 

52 

53 ingestDate = Column("ingest_date") 

54 

55 

56class ConvertExpressionToPredicateTestCase(unittest.TestCase): 

57 """A test case for the make_string_expression_predicate function""" 

58 

59 ingest_date_dtype = sqlalchemy.TIMESTAMP 

60 ingest_date_pytype = datetime.datetime 

61 ingest_date_literal = datetime.datetime(2020, 1, 1) 

62 

63 def setUp(self): 

64 self.column_types = ColumnTypeInfo( 

65 timespan_cls=TimespanDatabaseRepresentation.Compound, 

66 universe=DimensionUniverse(), 

67 dataset_id_spec=ddl.FieldSpec("dataset_id", dtype=ddl.GUID), 

68 run_key_spec=ddl.FieldSpec("run_id", dtype=sqlalchemy.BigInteger), 

69 ingest_date_dtype=self.ingest_date_dtype, 

70 ) 

71 

72 def test_simple(self): 

73 """Test with a trivial expression""" 

74 self.assertEqual( 

75 make_string_expression_predicate( 

76 "1 > 0", self.column_types.universe.empty, column_types=self.column_types 

77 )[0], 

78 ColumnExpression.literal(1, dtype=int).gt(ColumnExpression.literal(0, dtype=int)), 

79 ) 

80 

81 def test_time(self): 

82 """Test with a trivial expression including times""" 

83 time_converter = time_utils.TimeConverter() 

84 self.assertEqual( 

85 make_string_expression_predicate( 

86 "T'1970-01-01 00:00/tai' < T'2020-01-01 00:00/tai'", 

87 self.column_types.universe.empty, 

88 column_types=self.column_types, 

89 )[0], 

90 ColumnExpression.literal(time_converter.nsec_to_astropy(0), dtype=astropy.time.Time).lt( 

91 ColumnExpression.literal( 

92 time_converter.nsec_to_astropy(1577836800000000000), dtype=astropy.time.Time 

93 ) 

94 ), 

95 ) 

96 

97 def test_ingest_date(self): 

98 """Test with an expression including ingest_date which is native UTC""" 

99 self.assertEqual( 

100 make_string_expression_predicate( 

101 "ingest_date < T'2020-01-01 00:00/utc'", 

102 self.column_types.universe.empty, 

103 column_types=self.column_types, 

104 dataset_type_name="fake", 

105 )[0], 

106 ColumnExpression.reference( 

107 DatasetColumnTag("fake", "ingest_date"), dtype=self.ingest_date_pytype 

108 ).lt(ColumnExpression.literal(self.ingest_date_literal, dtype=self.ingest_date_pytype)), 

109 ) 

110 

111 def test_bind(self): 

112 """Test with bind parameters""" 

113 self.assertEqual( 

114 make_string_expression_predicate( 

115 "a > b OR t in (x, y, z)", 

116 self.column_types.universe.empty, 

117 column_types=self.column_types, 

118 bind={"a": 1, "b": 2, "t": 0, "x": 10, "y": 20, "z": 30}, 

119 )[0], 

120 ColumnExpression.literal(1, dtype=int) 

121 .gt(ColumnExpression.literal(2, dtype=int)) 

122 .logical_or( 

123 ColumnContainer.sequence( 

124 [ 

125 ColumnExpression.literal(10, dtype=int), 

126 ColumnExpression.literal(20, dtype=int), 

127 ColumnExpression.literal(30, dtype=int), 

128 ], 

129 dtype=int, 

130 ).contains(ColumnExpression.literal(0, dtype=int)) 

131 ), 

132 ) 

133 

134 def test_bind_list(self): 

135 """Test with bind parameter which is list/tuple/set inside IN rhs.""" 

136 self.assertEqual( 

137 make_string_expression_predicate( 

138 "a > b OR t in (x)", 

139 self.column_types.universe.empty, 

140 column_types=self.column_types, 

141 bind={"a": 1, "b": 2, "t": 0, "x": (10, 20, 30)}, 

142 )[0], 

143 ColumnExpression.literal(1, dtype=int) 

144 .gt(ColumnExpression.literal(2, dtype=int)) 

145 .logical_or( 

146 ColumnContainer.sequence( 

147 [ 

148 ColumnExpression.literal(10, dtype=int), 

149 ColumnExpression.literal(20, dtype=int), 

150 ColumnExpression.literal(30, dtype=int), 

151 ], 

152 dtype=int, 

153 ).contains( 

154 ColumnExpression.literal(0, dtype=int), 

155 ) 

156 ), 

157 ) 

158 # Couple of bound variables inside IN() with different combinations 

159 # of scalars and list. 

160 self.assertEqual( 

161 make_string_expression_predicate( 

162 "a > b OR t in (x, y)", 

163 self.column_types.universe.empty, 

164 column_types=self.column_types, 

165 bind={"a": 1, "b": 2, "t": 0, "x": 10, "y": 20}, 

166 )[0], 

167 ColumnExpression.literal(1, dtype=int) 

168 .gt(ColumnExpression.literal(2, dtype=int)) 

169 .logical_or( 

170 ColumnContainer.sequence( 

171 [ 

172 ColumnExpression.literal(10, dtype=int), 

173 ColumnExpression.literal(20, dtype=int), 

174 ], 

175 dtype=int, 

176 ).contains( 

177 ColumnExpression.literal(0, dtype=int), 

178 ) 

179 ), 

180 ) 

181 self.assertEqual( 

182 make_string_expression_predicate( 

183 "a > b OR t in (x, y)", 

184 self.column_types.universe.empty, 

185 column_types=self.column_types, 

186 bind={"a": 1, "b": 2, "t": 0, "x": [10, 30], "y": 20}, 

187 )[0], 

188 ColumnExpression.literal(1, dtype=int) 

189 .gt(ColumnExpression.literal(2, dtype=int)) 

190 .logical_or( 

191 ColumnContainer.sequence( 

192 [ 

193 ColumnExpression.literal(10, dtype=int), 

194 ColumnExpression.literal(30, dtype=int), 

195 ColumnExpression.literal(20, dtype=int), 

196 ], 

197 dtype=int, 

198 ).contains( 

199 ColumnExpression.literal(0, dtype=int), 

200 ) 

201 ), 

202 ) 

203 self.assertEqual( 

204 make_string_expression_predicate( 

205 "a > b OR t in (x, y)", 

206 self.column_types.universe.empty, 

207 column_types=self.column_types, 

208 bind={"a": 1, "b": 2, "t": 0, "x": (10, 30), "y": {20}}, 

209 )[0], 

210 ColumnExpression.literal(1, dtype=int) 

211 .gt(ColumnExpression.literal(2, dtype=int)) 

212 .logical_or( 

213 ColumnContainer.sequence( 

214 [ 

215 ColumnExpression.literal(10, dtype=int), 

216 ColumnExpression.literal(30, dtype=int), 

217 ColumnExpression.literal(20, dtype=int), 

218 ], 

219 dtype=int, 

220 ).contains(ColumnExpression.literal(0, dtype=int)) 

221 ), 

222 ) 

223 

224 

225class ConvertExpressionToPredicateTestCaseAstropy(ConvertExpressionToPredicateTestCase): 

226 """A test case for the make_string_expression_predicate function with 

227 ingest_date defined as nanoseconds. 

228 """ 

229 

230 ingest_date_dtype = ddl.AstropyTimeNsecTai 

231 ingest_date_pytype = astropy.time.Time 

232 ingest_date_literal = astropy.time.Time(datetime.datetime(2020, 1, 1), scale="utc") 

233 

234 

235class InspectionVisitorTestCase(unittest.TestCase): 

236 """Tests for InspectionVisitor class.""" 

237 

238 def test_simple(self): 

239 """Test for simple expressions""" 

240 universe = DimensionUniverse() 

241 parser = ParserYacc() 

242 

243 tree = parser.parse("instrument = 'LSST'") 

244 bind = {} 

245 summary = tree.visit(InspectionVisitor(universe, bind)) 

246 self.assertEqual(summary.dimensions, {"instrument"}) 

247 self.assertFalse(summary.columns) 

248 self.assertFalse(summary.hasIngestDate) 

249 self.assertEqual(summary.dataIdKey, universe["instrument"]) 

250 self.assertEqual(summary.dataIdValue, "LSST") 

251 

252 tree = parser.parse("instrument != 'LSST'") 

253 summary = tree.visit(InspectionVisitor(universe, bind)) 

254 self.assertEqual(summary.dimensions, {"instrument"}) 

255 self.assertFalse(summary.columns) 

256 self.assertIsNone(summary.dataIdKey) 

257 self.assertIsNone(summary.dataIdValue) 

258 

259 tree = parser.parse("instrument = 'LSST' AND visit = 1") 

260 summary = tree.visit(InspectionVisitor(universe, bind)) 

261 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter"}) 

262 self.assertFalse(summary.columns) 

263 self.assertIsNone(summary.dataIdKey) 

264 self.assertIsNone(summary.dataIdValue) 

265 

266 tree = parser.parse("instrument = 'LSST' AND visit = 1 AND skymap = 'x'") 

267 summary = tree.visit(InspectionVisitor(universe, bind)) 

268 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter", "skymap"}) 

269 self.assertFalse(summary.columns) 

270 self.assertIsNone(summary.dataIdKey) 

271 self.assertIsNone(summary.dataIdValue) 

272 

273 def test_bind(self): 

274 """Test for simple expressions with binds.""" 

275 universe = DimensionUniverse() 

276 parser = ParserYacc() 

277 

278 tree = parser.parse("instrument = instr") 

279 bind = {"instr": "LSST"} 

280 summary = tree.visit(InspectionVisitor(universe, bind)) 

281 self.assertEqual(summary.dimensions, {"instrument"}) 

282 self.assertFalse(summary.hasIngestDate) 

283 self.assertEqual(summary.dataIdKey, universe["instrument"]) 

284 self.assertEqual(summary.dataIdValue, "LSST") 

285 

286 tree = parser.parse("instrument != instr") 

287 self.assertEqual(summary.dimensions, {"instrument"}) 

288 summary = tree.visit(InspectionVisitor(universe, bind)) 

289 self.assertIsNone(summary.dataIdKey) 

290 self.assertIsNone(summary.dataIdValue) 

291 

292 tree = parser.parse("instrument = instr AND visit = visit_id") 

293 bind = {"instr": "LSST", "visit_id": 1} 

294 summary = tree.visit(InspectionVisitor(universe, bind)) 

295 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter"}) 

296 self.assertIsNone(summary.dataIdKey) 

297 self.assertIsNone(summary.dataIdValue) 

298 

299 tree = parser.parse("instrument = 'LSST' AND visit = 1 AND skymap = skymap_name") 

300 bind = {"instr": "LSST", "visit_id": 1, "skymap_name": "x"} 

301 summary = tree.visit(InspectionVisitor(universe, bind)) 

302 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter", "skymap"}) 

303 self.assertIsNone(summary.dataIdKey) 

304 self.assertIsNone(summary.dataIdValue) 

305 

306 def test_in(self): 

307 """Test for IN expressions.""" 

308 universe = DimensionUniverse() 

309 parser = ParserYacc() 

310 

311 tree = parser.parse("instrument IN ('LSST')") 

312 bind = {} 

313 summary = tree.visit(InspectionVisitor(universe, bind)) 

314 self.assertEqual(summary.dimensions, {"instrument"}) 

315 self.assertFalse(summary.hasIngestDate) 

316 # we do not handle IN with a single item as `=` 

317 self.assertIsNone(summary.dataIdKey) 

318 self.assertIsNone(summary.dataIdValue) 

319 

320 tree = parser.parse("instrument IN (instr)") 

321 bind = {"instr": "LSST"} 

322 summary = tree.visit(InspectionVisitor(universe, bind)) 

323 self.assertEqual(summary.dimensions, {"instrument"}) 

324 self.assertIsNone(summary.dataIdKey) 

325 self.assertIsNone(summary.dataIdValue) 

326 

327 tree = parser.parse("visit IN (1,2,3)") 

328 bind = {} 

329 summary = tree.visit(InspectionVisitor(universe, bind)) 

330 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter"}) 

331 self.assertIsNone(summary.dataIdKey) 

332 self.assertIsNone(summary.dataIdValue) 

333 

334 tree = parser.parse("visit IN (visit1, visit2, visit3)") 

335 bind = {"visit1": 1, "visit2": 2, "visit3": 3} 

336 summary = tree.visit(InspectionVisitor(universe, bind)) 

337 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter"}) 

338 self.assertIsNone(summary.dataIdKey) 

339 self.assertIsNone(summary.dataIdValue) 

340 

341 tree = parser.parse("visit IN (visits)") 

342 bind = {"visits": (1, 2, 3)} 

343 summary = tree.visit(InspectionVisitor(universe, bind)) 

344 self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter"}) 

345 self.assertIsNone(summary.dataIdKey) 

346 self.assertIsNone(summary.dataIdValue) 

347 

348 

349class CheckVisitorTestCase(unittest.TestCase): 

350 """Tests for CheckVisitor class.""" 

351 

352 def test_governor(self): 

353 """Test with governor dimension in expression""" 

354 parser = ParserYacc() 

355 

356 universe = DimensionUniverse() 

357 dimensions = universe.conform(("instrument", "visit")) 

358 dataId = DataCoordinate.make_empty(universe) 

359 defaults = DataCoordinate.make_empty(universe) 

360 

361 # governor-only constraint 

362 tree = parser.parse("instrument = 'LSST'") 

363 expr = NormalFormExpression.fromTree(tree, NormalForm.DISJUNCTIVE) 

364 binds = {} 

365 visitor = CheckVisitor(dataId, dimensions, binds, defaults) 

366 expr.visit(visitor) 

367 

368 tree = parser.parse("'LSST' = instrument") 

369 expr = NormalFormExpression.fromTree(tree, NormalForm.DISJUNCTIVE) 

370 binds = {} 

371 visitor = CheckVisitor(dataId, dimensions, binds, defaults) 

372 expr.visit(visitor) 

373 

374 # use bind for governor 

375 tree = parser.parse("instrument = instr") 

376 expr = NormalFormExpression.fromTree(tree, NormalForm.DISJUNCTIVE) 

377 binds = {"instr": "LSST"} 

378 visitor = CheckVisitor(dataId, dimensions, binds, defaults) 

379 expr.visit(visitor) 

380 

381 

382if __name__ == "__main__": 

383 unittest.main()