Coverage for tests / test_butlerUtils.py: 14%
307 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 09:17 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 09:17 +0000
1# This file is part of summit_utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import copy
23import datetime
24import os
25import random
26import unittest
27from typing import Iterable
29import lsst.daf.butler as dafButler
30import lsst.utils.tests
31from lsst.daf.butler import DatasetRef
32from lsst.resources import ResourcePath
33from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401
34from lsst.summit.utils.butlerUtils import (
35 RECENT_DAY,
36 _assureDict,
37 _dayobs_present,
38 _expid_present,
39 _get_dayobs_key,
40 _get_expid_key,
41 _get_seqnum_key,
42 _seqnum_present,
43 fillDataId,
44 getDatasetRefForDataId,
45 getDayObs,
46 getDayObsSeqNumFromExposureId,
47 getDaysWithData,
48 getExpId,
49 getExpIdFromDayObsSeqNum,
50 getExpRecord,
51 getExpRecordFromDataId,
52 getLatissDefaultCollections,
53 getLatissOnSkyDataIds,
54 getMostRecentDataId,
55 getMostRecentDayObs,
56 getSeqNum,
57 getSeqNumsForDayObs,
58 getSite,
59 makeDefaultLatissButler,
60 sanitizeDayObs,
61 sortRecordsByDayObsThenSeqNum,
62 updateDataId,
63 updateDataIdOrDataCord,
64)
67class ButlerUtilsTestCase(lsst.utils.tests.TestCase):
68 """A test case for testing sky position offsets for exposures."""
70 def setUp(self):
71 # this also functions as test_makeDefaultLatissButler(), but we may as
72 # well catch the butler once it's made so it can be reused if needed,
73 # given how hard it is to made it robustly
75 # butler stuff
76 try:
77 if getSite() == "jenkins":
78 raise unittest.SkipTest("Skip running butler-driven tests in Jenkins.")
79 self.butler = makeDefaultLatissButler()
80 except FileNotFoundError:
81 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.")
82 self.assertIsInstance(self.butler, dafButler.Butler)
84 # dict-like dataIds
85 self.rawDataId = getMostRecentDataId(self.butler)
86 self.fullId = fillDataId(self.butler, self.rawDataId)
87 self.assertIn("exposure", self.fullId)
88 self.assertIn("day_obs", self.fullId)
89 self.assertIn("seq_num", self.fullId)
90 self.expIdOnly = {"exposure": self.fullId["exposure"], "detector": 0}
91 self.dayObsSeqNumIdOnly = {
92 "day_obs": getDayObs(self.fullId),
93 "seq_num": getSeqNum(self.fullId),
94 "detector": 0,
95 }
97 # expRecords
98 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId)
99 self.assertIsInstance(self.expRecordNoDetector, dafButler.DimensionRecord)
100 self.assertFalse(hasattr(self.expRecordNoDetector, "detector"))
101 self.assertFalse("detector" in self.expRecordNoDetector.dataId)
102 # just a crosscheck on the above to make sure other things are correct
103 self.assertTrue(hasattr(self.expRecordNoDetector, "instrument"))
105 # data coordinates
106 # popping here because butler.registry.expandDataId cannot have
107 # day_obs or seq_num present right now
108 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId)
109 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum):
110 rawDataIdNoDayObSeqNum.pop(dayObsKey)
111 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum):
112 rawDataIdNoDayObSeqNum.pop(seqNumKey)
113 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum
114 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0)
115 self.assertIsInstance(self.dataCoordMinimal, dafButler.DataCoordinate)
117 def test_getLatissDefaultCollections(self):
118 defaultCollections = getLatissDefaultCollections()
119 self.assertTrue(defaultCollections is not None)
120 self.assertTrue(defaultCollections != [])
121 self.assertTrue(len(defaultCollections) >= 1)
123 def test_RECENT_DAY(self):
124 todayInt = int(datetime.date.today().strftime("%Y%m%d"))
125 self.assertTrue(RECENT_DAY <= todayInt) # in the past
126 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past
128 # check that the value of RECENT_DAY is before the end of the data.
129 daysWithData = getDaysWithData(self.butler)
130 self.assertLessEqual(RECENT_DAY, max(daysWithData))
132 # no test here, but print a warning if it hasn't been updated recently
133 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d")
134 now = datetime.datetime.today()
135 timeSinceUpdate = now - recentDay_datetime
136 if timeSinceUpdate.days > 100: # TODO:
137 print(
138 f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. "
139 "You might want to consider updating this to speed up butler queries."
140 )
142 def test_sanitizeDayObs(self):
143 dayObs = "2020-01-02"
144 self.assertEqual(sanitizeDayObs(dayObs), 20200102)
145 dayObs = 20210201
146 self.assertEqual(sanitizeDayObs(dayObs), dayObs)
148 with self.assertRaises(ValueError):
149 sanitizeDayObs(1.234)
150 sanitizeDayObs("Febuary 29th, 1970")
152 def test_getMostRecentDayObs(self):
153 # just a basic sanity check here as we can't know the value,
154 # but at least check something is returned, and is plausible
155 recentDay = getMostRecentDayObs(self.butler)
156 self.assertIsInstance(recentDay, int)
157 self.assertTrue(recentDay >= RECENT_DAY)
158 # some test data might be set a millennium in the future, i.e.
159 # the year wouldd be 2XXX+1000, so set to y4k just in case
160 self.assertTrue(recentDay < 40000000)
162 def test_getSeqNumsForDayObs(self):
163 emptyDay = 19990101
164 seqnums = getSeqNumsForDayObs(self.butler, emptyDay)
165 self.assertIsInstance(seqnums, Iterable)
166 self.assertEqual(len(list(seqnums)), 0)
168 recentDay = getMostRecentDayObs(self.butler)
169 seqnums = getSeqNumsForDayObs(self.butler, recentDay)
170 self.assertIsInstance(seqnums, Iterable)
171 self.assertTrue(len(list(seqnums)) >= 1)
173 def test_getMostRecentDataId(self):
174 # we can't know the values, but it should always return something
175 # and the dict and int forms should always have certain keys and agree
176 dataId = getMostRecentDataId(self.butler)
177 self.assertIsInstance(dataId, dict)
178 self.assertIn("day_obs", dataId)
179 self.assertIn("seq_num", dataId)
180 self.assertTrue("exposure" in dataId or "exposure.id" in dataId)
182 def test_getDatasetRefForDataId(self):
183 dRef = getDatasetRefForDataId(self.butler, "raw", self.rawDataId)
184 self.assertIsInstance(dRef, DatasetRef)
186 dRef = getDatasetRefForDataId(self.butler, "raw", self.rawDataIdNoDayObSeqNum)
187 self.assertIsInstance(dRef, DatasetRef)
188 dRef = getDatasetRefForDataId(self.butler, "raw", self.dataCoordMinimal)
189 self.assertIsInstance(dRef, DatasetRef)
191 def test__dayobs_present(self):
192 goods = [{"day_obs": 123}, {"exposure.day_obs": 234}, {"day_obs": 345, "otherkey": -1}]
193 bads = [{"different_key": 123}]
194 for good in goods:
195 self.assertTrue(_dayobs_present(good))
196 for bad in bads:
197 self.assertFalse(_dayobs_present(bad))
199 def test__seqnum_present(self):
200 goods = [{"seq_num": 123}, {"exposure.seq_num": 234}, {"seq_num": 345, "otherkey": -1}]
201 bads = [{"different_key": 123}]
202 for good in goods:
203 self.assertTrue(_seqnum_present(good))
204 for bad in bads:
205 self.assertFalse(_seqnum_present(bad))
207 def test__expid_present(self):
208 goods = [{"exposure": 123}, {"exposure.id": 234}, {"exposure.id": 345, "otherkey": -1}]
209 bads = [{"different_key": 123}]
210 for good in goods:
211 self.assertTrue(_expid_present(good))
212 for bad in bads:
213 self.assertFalse(_expid_present(bad))
215 def test_getDayObs(self):
216 dayVal = 98765
217 goods = [{"day_obs": dayVal}, {"exposure.day_obs": dayVal}, {"day_obs": dayVal, "otherkey": -1}]
218 bads = [{"different_key": 123}]
219 for good in goods:
220 self.assertTrue(getDayObs(good) == dayVal)
221 for bad in bads:
222 self.assertTrue(getDayObs(bad) is None)
224 def test_getSeqNum(self):
225 seqVal = 12345
226 goods = [{"seq_num": seqVal}, {"exposure.seq_num": seqVal}, {"seq_num": seqVal, "otherkey": -1}]
227 bads = [{"different_key": 123}]
228 for good in goods:
229 self.assertTrue(getSeqNum(good) == seqVal)
230 for bad in bads:
231 self.assertTrue(getSeqNum(bad) is None)
233 def test_getExpId(self):
234 expIdVal = 12345
235 goods = [{"exposure": expIdVal}, {"exposure.id": expIdVal}, {"exposure": expIdVal, "otherkey": -1}]
236 bads = [{"different_key": 123}]
237 for good in goods:
238 self.assertTrue(getExpId(good) == expIdVal)
239 for bad in bads:
240 self.assertTrue(getExpId(bad) is None)
242 def test_datasetExists(self):
243 self.assertTrue(self.butler.exists("raw", self.rawDataId))
244 self.assertTrue(self.butler.exists("raw", self.expIdOnly))
245 self.assertTrue(self.butler.exists("raw", self.dayObsSeqNumIdOnly))
246 return
248 def test_sortRecordsByDayObsThenSeqNum(self):
249 where = "exposure.day_obs=dayObs"
250 expRecords = self.butler.registry.queryDimensionRecords(
251 "exposure", where=where, bind={"dayObs": RECENT_DAY}
252 )
253 expRecords = list(expRecords)
254 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test
255 random.shuffle(expRecords) # they are often already in order, so make sure they're not
256 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords)
257 for i, _id in enumerate(sortedIds[:-1]):
258 self.assertTrue(_id.seq_num < sortedIds[i + 1].seq_num)
260 # Check that ambiguous sorts raise as expected
261 with self.assertRaises(ValueError):
262 expRecords = self.butler.registry.queryDimensionRecords(
263 "exposure", where=where, bind={"dayObs": RECENT_DAY}
264 )
265 expRecords = list(expRecords)
266 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test
267 expRecords.append(expRecords[0]) # add a duplicate
268 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords)
269 return
271 def test_getDaysWithData(self):
272 days = getDaysWithData(self.butler)
273 self.assertTrue(len(days) >= 0)
274 self.assertIsInstance(days[0], int)
275 return
277 def test_getExpIdFromDayObsSeqNum(self):
278 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly)
279 self.assertTrue(_expid_present(expId))
280 return
282 def test_updateDataIdOrDataCord(self):
283 updateVals = {"testKey": "testValue"}
285 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal]
286 for originalId in ids:
287 testId = updateDataIdOrDataCord(originalId, **updateVals)
288 for k, v in updateVals.items():
289 self.assertTrue(testId[k] == v)
290 return
292 def test_fillDataId(self):
293 self.assertFalse(_dayobs_present(self.expIdOnly))
294 self.assertFalse(_seqnum_present(self.expIdOnly))
296 fullId = fillDataId(self.butler, self.expIdOnly)
297 self.assertTrue(_dayobs_present(fullId))
298 self.assertTrue(_seqnum_present(fullId))
300 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal]
301 for dataId in ids:
302 fullId = fillDataId(self.butler, dataId)
303 self.assertTrue(_dayobs_present(fullId))
304 self.assertTrue(_seqnum_present(fullId))
305 self.assertTrue(_expid_present(fullId))
306 return
308 def test_getExpRecordFromDataId(self):
309 record = getExpRecordFromDataId(self.butler, self.rawDataId)
310 self.assertIsInstance(record, dafButler.DimensionRecord)
311 return
313 def test_getDayObsSeqNumFromExposureId(self):
314 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly)
315 self.assertTrue(_dayobs_present(dayObsSeqNum))
316 self.assertTrue(_seqnum_present(dayObsSeqNum))
317 return
319 def test_removeDataProduct(self):
320 # Can't think of an easy or safe test for this
321 return
323 def test_getLatissOnSkyDataIds(self):
324 # This is very slow, consider removing as it's the least import of all
325 # the util functions. However, restricting it to only the most recent
326 # day does help a lot, so probably OK like that, and should speed up
327 # with middleware improvements in the future, and we should ensure
328 # that they don't break this, so inclined to leave for now
329 dayToUse = getDaysWithData(self.butler)[-1]
330 # the most recent day with data might only be biases or flats so make
331 # sure to override the default of skipping biases, darks & flats
332 skipTypes = ()
333 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse)
334 self.assertTrue(len(ids) > 0)
335 self.assertTrue(ids[0] is not None)
337 ids = getLatissOnSkyDataIds(
338 self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse, full=True
339 )
340 self.assertTrue(len(ids) > 0)
341 self.assertTrue(ids[0] is not None)
342 testId = ids[0]
343 self.assertTrue(_dayobs_present(testId))
344 self.assertTrue(_seqnum_present(testId))
345 self.assertTrue(_expid_present(testId))
346 return
348 def test__assureDict(self):
349 for item in [
350 self.rawDataId,
351 self.fullId,
352 self.expIdOnly,
353 self.expRecordNoDetector,
354 self.dataCoordMinimal,
355 self.rawDataIdNoDayObSeqNum,
356 ]:
357 testId = _assureDict(item)
358 self.assertIsInstance(testId, dict)
359 return
361 def test__get_dayobs_key(self):
362 dataId = {"a_random_key": 321, "exposure.day_obs": 20200312, "z_random_key": "abc"}
363 self.assertTrue(_get_dayobs_key(dataId) == "exposure.day_obs")
364 dataId = {"day_obs": 20200312}
365 self.assertTrue(_get_dayobs_key(dataId) == "day_obs")
366 dataId = {"missing": 20200312}
367 self.assertTrue(_get_dayobs_key(dataId) is None)
368 return
370 def test__get_seqnum_key(self):
371 dataId = {"a_random_key": 321, "exposure.seq_num": 123, "z_random_key": "abc"}
372 self.assertTrue(_get_seqnum_key(dataId) == "exposure.seq_num")
373 dataId = {"seq_num": 123}
374 self.assertTrue(_get_seqnum_key(dataId) == "seq_num")
375 dataId = {"missing": 123}
376 self.assertTrue(_get_seqnum_key(dataId) is None)
377 return
379 def test__get_expid_key(self):
380 dataId = {"a_random_key": 321, "exposure.id": 123, "z_random_key": "abc"}
381 self.assertTrue(_get_expid_key(dataId) == "exposure.id")
382 dataId = {"a_random_key": 321, "exposure": 123, "z_random_key": "abc"}
383 self.assertTrue(_get_expid_key(dataId) == "exposure")
384 dataId = {"missing": 123}
385 self.assertTrue(_get_expid_key(dataId) is None)
386 return
388 def test_updateDataId(self):
389 # check with a dataCoordinate
390 dataId = copy.copy(self.expRecordNoDetector.dataId)
391 self.assertTrue("detector" not in dataId)
392 dataId = updateDataId(dataId, detector=123)
393 self.assertTrue("detector" in dataId)
394 self.assertEqual(dataId["detector"], 123)
396 # check with a dict
397 self.assertIsInstance(self.rawDataId, dict)
398 dataId = copy.copy(self.rawDataId)
399 dataId.pop("detector")
400 self.assertTrue("detector" not in dataId)
401 dataId = updateDataId(dataId, detector=321)
402 self.assertTrue("detector" in dataId)
403 self.assertEqual(dataId["detector"], 321)
405 def test_getExpRecord(self):
406 expId = self.expIdOnly["exposure"]
407 dayObs = self.dayObsSeqNumIdOnly["day_obs"]
408 seqNum = self.dayObsSeqNumIdOnly["seq_num"]
410 recordByExpId = getExpRecord(self.butler, "LATISS", expId=expId)
411 self.assertIsInstance(recordByExpId, dafButler.DimensionRecord)
413 recordByDayObsSeqNum = getExpRecord(self.butler, "LATISS", dayObs=dayObs, seqNum=seqNum)
414 self.assertIsInstance(recordByDayObsSeqNum, dafButler.DimensionRecord)
415 self.assertEqual(recordByExpId, recordByDayObsSeqNum)
417 with self.assertRaises(ValueError):
418 # because we need dayObs too, so immediate raise due to bad args
419 _ = getExpRecord(self.butler, "LATISS", seqNum=seqNum)
421 with self.assertRaises(RuntimeError):
422 # (dayObs, seqNum) no longer matches the expId, so there are no
423 # results, which is a RuntimeError
424 _ = getExpRecord(self.butler, "LATISS", expId=expId, dayObs=dayObs, seqNum=seqNum + 1)
427class ButlerInitTestCase(lsst.utils.tests.TestCase):
428 """Separately test whether we can make a butler with the env var set
429 and that the expected error type is raised and passed through when it is
430 not, as this is relied upon to correctly skip tests when butler init is
431 not possible.
432 """
434 def test_dafButlerRaiseTypes(self):
435 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then
436 # using an instrument label raises a FileNotFoundError
437 with unittest.mock.patch.dict("os.environ"):
438 if "DAF_BUTLER_REPOSITORY_INDEX" in os.environ: # can't del unless it's already there
439 del os.environ["DAF_BUTLER_REPOSITORY_INDEX"]
440 with self.assertRaises(FileNotFoundError):
441 dafButler.Butler("LATISS")
443 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty
444 # string then using a label raises a RuntimeError
445 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ""}):
446 with self.assertRaises(FileNotFoundError):
447 dafButler.Butler("LATISS")
449 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given
450 # camera existing, but we can check that we get the expected error
451 # when trying to init an instrument which definitely won't be defined.
452 if os.getenv("DAF_BUTLER_REPOSITORY_INDEX"):
453 with self.assertRaises(FileNotFoundError):
454 dafButler.Butler("NotAValidCameraName")
456 def test_makeDefaultLatissButlerRaiseTypes(self):
457 """makeDefaultLatissButler unifies the mixed exception types from
458 butler inits, so test all available possibilities here.
459 """
460 if getSite() == "jenkins":
461 raise unittest.SkipTest("Skip running butler-driven tests in Jenkins.")
462 with unittest.mock.patch.dict("os.environ"):
463 if "DAF_BUTLER_REPOSITORY_INDEX" in os.environ: # can't del unless it's already there
464 del os.environ["DAF_BUTLER_REPOSITORY_INDEX"]
465 with self.assertRaises(FileNotFoundError):
466 makeDefaultLatissButler()
468 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ""}):
469 with self.assertRaises(FileNotFoundError):
470 makeDefaultLatissButler()
472 fakeFile = "/path/to/a/file/which/does/not_exist.yaml"
473 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}):
474 with self.assertRaises(FileNotFoundError):
475 makeDefaultLatissButler()
477 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self):
478 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point
479 # to an actual file
480 repoFile = os.getenv("DAF_BUTLER_REPOSITORY_INDEX")
481 if repoFile:
482 self.assertTrue(ResourcePath(repoFile).exists())
485class TestMemory(lsst.utils.tests.MemoryTestCase):
486 pass
489def setup_module(module):
490 lsst.utils.tests.init()
493if __name__ == "__main__": 493 ↛ 494line 493 didn't jump to line 494 because the condition on line 493 was never true
494 lsst.utils.tests.init()
495 unittest.main()