Coverage for tests/test_butlerUtils.py: 13%
307 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-03 04:44 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-03 04:44 -0700
1# This file is part of summit_utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import copy
23import datetime
24import os
25import random
26import unittest
27from typing import Iterable
29import lsst.daf.butler as dafButler
30import lsst.utils.tests
31from lsst.daf.butler import DatasetRef, NamedKeyMapping
32from lsst.resources import ResourcePath
33from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401
34from lsst.summit.utils.butlerUtils import (
35 RECENT_DAY,
36 _assureDict,
37 _dayobs_present,
38 _expid_present,
39 _get_dayobs_key,
40 _get_expid_key,
41 _get_seqnum_key,
42 _seqnum_present,
43 datasetExists,
44 fillDataId,
45 getDatasetRefForDataId,
46 getDayObs,
47 getDayObsSeqNumFromExposureId,
48 getDaysWithData,
49 getExpId,
50 getExpIdFromDayObsSeqNum,
51 getExpRecord,
52 getExpRecordFromDataId,
53 getLatissDefaultCollections,
54 getLatissOnSkyDataIds,
55 getMostRecentDataId,
56 getMostRecentDayObs,
57 getSeqNum,
58 getSeqNumsForDayObs,
59 makeDefaultLatissButler,
60 sanitizeDayObs,
61 sortRecordsByDayObsThenSeqNum,
62 updateDataId,
63 updateDataIdOrDataCord,
64)
67class ButlerUtilsTestCase(lsst.utils.tests.TestCase):
68 """A test case for testing sky position offsets for exposures."""
70 def setUp(self):
71 # this also functions as test_makeDefaultLatissButler(), but we may as
72 # well catch the butler once it's made so it can be reused if needed,
73 # given how hard it is to made it robustly
75 # butler stuff
76 try:
77 self.butler = makeDefaultLatissButler()
78 except FileNotFoundError:
79 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.")
80 self.assertIsInstance(self.butler, dafButler.Butler)
82 # dict-like dataIds
83 self.rawDataId = getMostRecentDataId(self.butler)
84 self.fullId = fillDataId(self.butler, self.rawDataId)
85 self.assertIn("exposure", self.fullId)
86 self.assertIn("day_obs", self.fullId)
87 self.assertIn("seq_num", self.fullId)
88 self.expIdOnly = {"exposure": self.fullId["exposure"], "detector": 0}
89 self.dayObsSeqNumIdOnly = {
90 "day_obs": getDayObs(self.fullId),
91 "seq_num": getSeqNum(self.fullId),
92 "detector": 0,
93 }
95 # expRecords
96 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId)
97 self.assertIsInstance(self.expRecordNoDetector, dafButler.dimensions.DimensionRecord)
98 self.assertFalse(hasattr(self.expRecordNoDetector, "detector"))
99 self.assertFalse("detector" in self.expRecordNoDetector.dataId)
100 # just a crosscheck on the above to make sure other things are correct
101 self.assertTrue(hasattr(self.expRecordNoDetector, "instrument"))
103 # data coordinates
104 # popping here because butler.registry.expandDataId cannot have
105 # day_obs or seq_num present right now
106 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId)
107 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum):
108 rawDataIdNoDayObSeqNum.pop(dayObsKey)
109 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum):
110 rawDataIdNoDayObSeqNum.pop(seqNumKey)
111 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum
112 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0)
113 self.dataCoordFullView = self.butler.registry.expandDataId(
114 self.rawDataIdNoDayObSeqNum, detector=0
115 ).full
116 self.assertIsInstance(self.dataCoordMinimal, dafButler.dimensions.DataCoordinate)
117 self.assertIsInstance(self.dataCoordFullView, NamedKeyMapping)
119 def test_getLatissDefaultCollections(self):
120 defaultCollections = getLatissDefaultCollections()
121 self.assertTrue(defaultCollections is not None)
122 self.assertTrue(defaultCollections != [])
123 self.assertTrue(len(defaultCollections) >= 1)
125 def test_RECENT_DAY(self):
126 todayInt = int(datetime.date.today().strftime("%Y%m%d"))
127 self.assertTrue(RECENT_DAY <= todayInt) # in the past
128 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past
130 # check that the value of RECENT_DAY is before the end of the data.
131 daysWithData = getDaysWithData(self.butler)
132 self.assertLessEqual(RECENT_DAY, max(daysWithData))
134 # no test here, but print a warning if it hasn't been updated recently
135 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d")
136 now = datetime.datetime.today()
137 timeSinceUpdate = now - recentDay_datetime
138 if timeSinceUpdate.days > 100: # TODO:
139 print(
140 f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. "
141 "You might want to consider updating this to speed up butler queries."
142 )
144 def test_sanitizeDayObs(self):
145 dayObs = "2020-01-02"
146 self.assertEqual(sanitizeDayObs(dayObs), 20200102)
147 dayObs = 20210201
148 self.assertEqual(sanitizeDayObs(dayObs), dayObs)
150 with self.assertRaises(ValueError):
151 sanitizeDayObs(1.234)
152 sanitizeDayObs("Febuary 29th, 1970")
154 def test_getMostRecentDayObs(self):
155 # just a basic sanity check here as we can't know the value,
156 # but at least check something is returned, and is plausible
157 recentDay = getMostRecentDayObs(self.butler)
158 self.assertIsInstance(recentDay, int)
159 self.assertTrue(recentDay >= RECENT_DAY)
160 # some test data might be set a millennium in the future, i.e.
161 # the year wouldd be 2XXX+1000, so set to y4k just in case
162 self.assertTrue(recentDay < 40000000)
164 def test_getSeqNumsForDayObs(self):
165 emptyDay = 19990101
166 seqnums = getSeqNumsForDayObs(self.butler, emptyDay)
167 self.assertIsInstance(seqnums, Iterable)
168 self.assertEqual(len(list(seqnums)), 0)
170 recentDay = getMostRecentDayObs(self.butler)
171 seqnums = getSeqNumsForDayObs(self.butler, recentDay)
172 self.assertIsInstance(seqnums, Iterable)
173 self.assertTrue(len(list(seqnums)) >= 1)
175 def test_getMostRecentDataId(self):
176 # we can't know the values, but it should always return something
177 # and the dict and int forms should always have certain keys and agree
178 dataId = getMostRecentDataId(self.butler)
179 self.assertIsInstance(dataId, dict)
180 self.assertIn("day_obs", dataId)
181 self.assertIn("seq_num", dataId)
182 self.assertTrue("exposure" in dataId or "exposure.id" in dataId)
184 def test_getDatasetRefForDataId(self):
185 dRef = getDatasetRefForDataId(self.butler, "raw", self.rawDataId)
186 self.assertIsInstance(dRef, DatasetRef)
188 dRef = getDatasetRefForDataId(self.butler, "raw", self.rawDataIdNoDayObSeqNum)
189 self.assertIsInstance(dRef, DatasetRef)
190 dRef = getDatasetRefForDataId(self.butler, "raw", self.dataCoordMinimal)
191 self.assertIsInstance(dRef, DatasetRef)
192 dRef = getDatasetRefForDataId(self.butler, "raw", self.dataCoordFullView)
193 self.assertIsInstance(dRef, DatasetRef)
195 def test__dayobs_present(self):
196 goods = [{"day_obs": 123}, {"exposure.day_obs": 234}, {"day_obs": 345, "otherkey": -1}]
197 bads = [{"different_key": 123}]
198 for good in goods:
199 self.assertTrue(_dayobs_present(good))
200 for bad in bads:
201 self.assertFalse(_dayobs_present(bad))
203 def test__seqnum_present(self):
204 goods = [{"seq_num": 123}, {"exposure.seq_num": 234}, {"seq_num": 345, "otherkey": -1}]
205 bads = [{"different_key": 123}]
206 for good in goods:
207 self.assertTrue(_seqnum_present(good))
208 for bad in bads:
209 self.assertFalse(_seqnum_present(bad))
211 def test__expid_present(self):
212 goods = [{"exposure": 123}, {"exposure.id": 234}, {"exposure.id": 345, "otherkey": -1}]
213 bads = [{"different_key": 123}]
214 for good in goods:
215 self.assertTrue(_expid_present(good))
216 for bad in bads:
217 self.assertFalse(_expid_present(bad))
219 def test_getDayObs(self):
220 dayVal = 98765
221 goods = [{"day_obs": dayVal}, {"exposure.day_obs": dayVal}, {"day_obs": dayVal, "otherkey": -1}]
222 bads = [{"different_key": 123}]
223 for good in goods:
224 self.assertTrue(getDayObs(good) == dayVal)
225 for bad in bads:
226 self.assertTrue(getDayObs(bad) is None)
228 def test_getSeqNum(self):
229 seqVal = 12345
230 goods = [{"seq_num": seqVal}, {"exposure.seq_num": seqVal}, {"seq_num": seqVal, "otherkey": -1}]
231 bads = [{"different_key": 123}]
232 for good in goods:
233 self.assertTrue(getSeqNum(good) == seqVal)
234 for bad in bads:
235 self.assertTrue(getSeqNum(bad) is None)
237 def test_getExpId(self):
238 expIdVal = 12345
239 goods = [{"exposure": expIdVal}, {"exposure.id": expIdVal}, {"exposure": expIdVal, "otherkey": -1}]
240 bads = [{"different_key": 123}]
241 for good in goods:
242 self.assertTrue(getExpId(good) == expIdVal)
243 for bad in bads:
244 self.assertTrue(getExpId(bad) is None)
246 def test_datasetExists(self):
247 self.assertTrue(datasetExists(self.butler, "raw", self.rawDataId))
248 self.assertTrue(datasetExists(self.butler, "raw", self.expIdOnly))
249 self.assertTrue(datasetExists(self.butler, "raw", self.dayObsSeqNumIdOnly))
250 return
252 def test_sortRecordsByDayObsThenSeqNum(self):
253 where = "exposure.day_obs=dayObs"
254 expRecords = self.butler.registry.queryDimensionRecords(
255 "exposure", where=where, bind={"dayObs": RECENT_DAY}
256 )
257 expRecords = list(expRecords)
258 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test
259 random.shuffle(expRecords) # they are often already in order, so make sure they're not
260 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords)
261 for i, _id in enumerate(sortedIds[:-1]):
262 self.assertTrue(_id.seq_num < sortedIds[i + 1].seq_num)
264 # Check that ambiguous sorts raise as expected
265 with self.assertRaises(ValueError):
266 expRecords = self.butler.registry.queryDimensionRecords(
267 "exposure", where=where, bind={"dayObs": RECENT_DAY}
268 )
269 expRecords = list(expRecords)
270 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test
271 expRecords.append(expRecords[0]) # add a duplicate
272 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords)
273 return
275 def test_getDaysWithData(self):
276 days = getDaysWithData(self.butler)
277 self.assertTrue(len(days) >= 0)
278 self.assertIsInstance(days[0], int)
279 return
281 def test_getExpIdFromDayObsSeqNum(self):
282 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly)
283 self.assertTrue(_expid_present(expId))
284 return
286 def test_updateDataIdOrDataCord(self):
287 updateVals = {"testKey": "testValue"}
289 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView]
290 for originalId in ids:
291 testId = updateDataIdOrDataCord(originalId, **updateVals)
292 for k, v in updateVals.items():
293 self.assertTrue(testId[k] == v)
294 return
296 def test_fillDataId(self):
297 self.assertFalse(_dayobs_present(self.expIdOnly))
298 self.assertFalse(_seqnum_present(self.expIdOnly))
300 fullId = fillDataId(self.butler, self.expIdOnly)
301 self.assertTrue(_dayobs_present(fullId))
302 self.assertTrue(_seqnum_present(fullId))
304 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView]
305 for dataId in ids:
306 fullId = fillDataId(self.butler, dataId)
307 self.assertTrue(_dayobs_present(fullId))
308 self.assertTrue(_seqnum_present(fullId))
309 self.assertTrue(_expid_present(fullId))
310 return
312 def test_getExpRecordFromDataId(self):
313 record = getExpRecordFromDataId(self.butler, self.rawDataId)
314 self.assertIsInstance(record, dafButler.dimensions.DimensionRecord)
315 return
317 def test_getDayObsSeqNumFromExposureId(self):
318 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly)
319 self.assertTrue(_dayobs_present(dayObsSeqNum))
320 self.assertTrue(_seqnum_present(dayObsSeqNum))
321 return
323 def test_removeDataProduct(self):
324 # Can't think of an easy or safe test for this
325 return
327 def test_getLatissOnSkyDataIds(self):
328 # This is very slow, consider removing as it's the least import of all
329 # the util functions. However, restricting it to only the most recent
330 # day does help a lot, so probably OK like that, and should speed up
331 # with middleware improvements in the future, and we should ensure
332 # that they don't break this, so inclined to leave for now
333 dayToUse = getDaysWithData(self.butler)[-1]
334 # the most recent day with data might only be biases or flats so make
335 # sure to override the default of skipping biases, darks & flats
336 skipTypes = ()
337 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse)
338 self.assertTrue(len(ids) > 0)
339 self.assertTrue(ids[0] is not None)
341 ids = getLatissOnSkyDataIds(
342 self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse, full=True
343 )
344 self.assertTrue(len(ids) > 0)
345 self.assertTrue(ids[0] is not None)
346 testId = ids[0]
347 self.assertTrue(_dayobs_present(testId))
348 self.assertTrue(_seqnum_present(testId))
349 self.assertTrue(_expid_present(testId))
350 return
352 def test__assureDict(self):
353 for item in [
354 self.rawDataId,
355 self.fullId,
356 self.expIdOnly,
357 self.expRecordNoDetector,
358 self.dataCoordFullView,
359 self.dataCoordMinimal,
360 self.rawDataIdNoDayObSeqNum,
361 ]:
362 testId = _assureDict(item)
363 self.assertIsInstance(testId, dict)
364 return
366 def test__get_dayobs_key(self):
367 dataId = {"a_random_key": 321, "exposure.day_obs": 20200312, "z_random_key": "abc"}
368 self.assertTrue(_get_dayobs_key(dataId) == "exposure.day_obs")
369 dataId = {"day_obs": 20200312}
370 self.assertTrue(_get_dayobs_key(dataId) == "day_obs")
371 dataId = {"missing": 20200312}
372 self.assertTrue(_get_dayobs_key(dataId) is None)
373 return
375 def test__get_seqnum_key(self):
376 dataId = {"a_random_key": 321, "exposure.seq_num": 123, "z_random_key": "abc"}
377 self.assertTrue(_get_seqnum_key(dataId) == "exposure.seq_num")
378 dataId = {"seq_num": 123}
379 self.assertTrue(_get_seqnum_key(dataId) == "seq_num")
380 dataId = {"missing": 123}
381 self.assertTrue(_get_seqnum_key(dataId) is None)
382 return
384 def test__get_expid_key(self):
385 dataId = {"a_random_key": 321, "exposure.id": 123, "z_random_key": "abc"}
386 self.assertTrue(_get_expid_key(dataId) == "exposure.id")
387 dataId = {"a_random_key": 321, "exposure": 123, "z_random_key": "abc"}
388 self.assertTrue(_get_expid_key(dataId) == "exposure")
389 dataId = {"missing": 123}
390 self.assertTrue(_get_expid_key(dataId) is None)
391 return
393 def test_updateDataId(self):
394 # check with a dataCoordinate
395 dataId = copy.copy(self.expRecordNoDetector.dataId)
396 self.assertTrue("detector" not in dataId)
397 dataId = updateDataId(dataId, detector=123)
398 self.assertTrue("detector" in dataId)
399 self.assertEqual(dataId["detector"], 123)
401 # check with a dict
402 self.assertIsInstance(self.rawDataId, dict)
403 dataId = copy.copy(self.rawDataId)
404 dataId.pop("detector")
405 self.assertTrue("detector" not in dataId)
406 dataId = updateDataId(dataId, detector=321)
407 self.assertTrue("detector" in dataId)
408 self.assertEqual(dataId["detector"], 321)
410 def test_getExpRecord(self):
411 expId = self.expIdOnly["exposure"]
412 dayObs = self.dayObsSeqNumIdOnly["day_obs"]
413 seqNum = self.dayObsSeqNumIdOnly["seq_num"]
415 recordByExpId = getExpRecord(self.butler, "LATISS", expId=expId)
416 self.assertIsInstance(recordByExpId, dafButler.dimensions.DimensionRecord)
418 recordByDayObsSeqNum = getExpRecord(self.butler, "LATISS", dayObs=dayObs, seqNum=seqNum)
419 self.assertIsInstance(recordByDayObsSeqNum, dafButler.dimensions.DimensionRecord)
420 self.assertEqual(recordByExpId, recordByDayObsSeqNum)
422 with self.assertRaises(ValueError):
423 # because we need dayObs too, so immediate raise due to bad args
424 _ = getExpRecord(self.butler, "LATISS", seqNum=seqNum)
426 with self.assertRaises(RuntimeError):
427 # (dayObs, seqNum) no longer matches the expId, so there are no
428 # results, which is a RuntimeError
429 _ = getExpRecord(self.butler, "LATISS", expId=expId, dayObs=dayObs, seqNum=seqNum + 1)
432class ButlerInitTestCase(lsst.utils.tests.TestCase):
433 """Separately test whether we can make a butler with the env var set
434 and that the expected error type is raised and passed through when it is
435 not, as this is relied upon to correctly skip tests when butler init is
436 not possible.
437 """
439 def test_dafButlerRaiseTypes(self):
440 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then
441 # using an instrument label raises a FileNotFoundError
442 with unittest.mock.patch.dict("os.environ"):
443 if "DAF_BUTLER_REPOSITORY_INDEX" in os.environ: # can't del unless it's already there
444 del os.environ["DAF_BUTLER_REPOSITORY_INDEX"]
445 with self.assertRaises(FileNotFoundError):
446 dafButler.Butler("LATISS")
448 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty
449 # string then using a label raises a RuntimeError
450 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ""}):
451 with self.assertRaises(FileNotFoundError):
452 dafButler.Butler("LATISS")
454 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given
455 # camera existing, but we can check that we get the expected error
456 # when trying to init an instrument which definitely won't be defined.
457 if os.getenv("DAF_BUTLER_REPOSITORY_INDEX"):
458 with self.assertRaises(FileNotFoundError):
459 dafButler.Butler("NotAValidCameraName")
461 def test_makeDefaultLatissButlerRaiseTypes(self):
462 """makeDefaultLatissButler unifies the mixed exception types from
463 butler inits, so test all available possibilities here.
464 """
465 with unittest.mock.patch.dict("os.environ"):
466 if "DAF_BUTLER_REPOSITORY_INDEX" in os.environ: # can't del unless it's already there
467 del os.environ["DAF_BUTLER_REPOSITORY_INDEX"]
468 with self.assertRaises(FileNotFoundError):
469 makeDefaultLatissButler()
471 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ""}):
472 with self.assertRaises(FileNotFoundError):
473 makeDefaultLatissButler()
475 fakeFile = "/path/to/a/file/which/does/not_exist.yaml"
476 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}):
477 with self.assertRaises(FileNotFoundError):
478 makeDefaultLatissButler()
480 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self):
481 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point
482 # to an actual file
483 repoFile = os.getenv("DAF_BUTLER_REPOSITORY_INDEX")
484 if repoFile:
485 self.assertTrue(ResourcePath(repoFile).exists())
488class TestMemory(lsst.utils.tests.MemoryTestCase):
489 pass
492def setup_module(module):
493 lsst.utils.tests.init()
496if __name__ == "__main__": 496 ↛ 497line 496 didn't jump to line 497, because the condition on line 496 was never true
497 lsst.utils.tests.init()
498 unittest.main()