Coverage for tests/test_butlerUtils.py: 13%
307 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-09 14:32 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-09 14:32 +0000
1# This file is part of summit_utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import os
23import unittest
24from typing import Iterable
25import datetime
26import random
27import copy
29import lsst.utils.tests
30from lsst.summit.utils.butlerUtils import (makeDefaultLatissButler,
31 updateDataId,
32 sanitizeDayObs,
33 getMostRecentDayObs,
34 getSeqNumsForDayObs,
35 getMostRecentDataId,
36 getDatasetRefForDataId,
37 _dayobs_present,
38 _seqnum_present,
39 _expid_present,
40 _get_dayobs_key,
41 _get_seqnum_key,
42 _get_expid_key,
43 getDayObs,
44 getSeqNum,
45 getExpId,
46 datasetExists,
47 sortRecordsByDayObsThenSeqNum,
48 getDaysWithData,
49 getExpIdFromDayObsSeqNum,
50 updateDataIdOrDataCord,
51 fillDataId,
52 getExpRecordFromDataId,
53 getDayObsSeqNumFromExposureId,
54 getLatissOnSkyDataIds,
55 _assureDict,
56 getLatissDefaultCollections,
57 RECENT_DAY,
58 getExpRecord,
59 )
60from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401
61import lsst.daf.butler as dafButler
62from lsst.daf.butler import DatasetRef, NamedKeyMapping
63from lsst.resources import ResourcePath
66class ButlerUtilsTestCase(lsst.utils.tests.TestCase):
67 """A test case for testing sky position offsets for exposures."""
69 def setUp(self):
70 # this also functions as test_makeDefaultLatissButler(), but we may as
71 # well catch the butler once it's made so it can be reused if needed,
72 # given how hard it is to made it robustly
74 # butler stuff
75 try:
76 self.butler = makeDefaultLatissButler()
77 except FileNotFoundError:
78 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.")
79 self.assertIsInstance(self.butler, dafButler.Butler)
81 # dict-like dataIds
82 self.rawDataId = getMostRecentDataId(self.butler)
83 self.fullId = fillDataId(self.butler, self.rawDataId)
84 self.assertIn('exposure', self.fullId)
85 self.assertIn('day_obs', self.fullId)
86 self.assertIn('seq_num', self.fullId)
87 self.expIdOnly = {'exposure': self.fullId['exposure'], 'detector': 0}
88 self.dayObsSeqNumIdOnly = {'day_obs': getDayObs(self.fullId), 'seq_num': getSeqNum(self.fullId),
89 'detector': 0}
91 # expRecords
92 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId)
93 self.assertIsInstance(self.expRecordNoDetector, dafButler.dimensions.DimensionRecord)
94 self.assertFalse(hasattr(self.expRecordNoDetector, 'detector'))
95 self.assertFalse('detector' in self.expRecordNoDetector.dataId)
96 # just a crosscheck on the above to make sure other things are correct
97 self.assertTrue(hasattr(self.expRecordNoDetector, 'instrument'))
99 # data coordinates
100 # popping here because butler.registry.expandDataId cannot have
101 # day_obs or seq_num present right now
102 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId)
103 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum):
104 rawDataIdNoDayObSeqNum.pop(dayObsKey)
105 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum):
106 rawDataIdNoDayObSeqNum.pop(seqNumKey)
107 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum
108 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0)
109 self.dataCoordFullView = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum,
110 detector=0).full
111 self.assertIsInstance(self.dataCoordMinimal, dafButler.dimensions.DataCoordinate)
112 self.assertIsInstance(self.dataCoordFullView, NamedKeyMapping)
114 def test_getLatissDefaultCollections(self):
115 defaultCollections = getLatissDefaultCollections()
116 self.assertTrue(defaultCollections is not None)
117 self.assertTrue(defaultCollections != [])
118 self.assertTrue(len(defaultCollections) >= 1)
120 def test_RECENT_DAY(self):
121 todayInt = int(datetime.date.today().strftime("%Y%m%d"))
122 self.assertTrue(RECENT_DAY <= todayInt) # in the past
123 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past
125 # check that the value of RECENT_DAY is before the end of the data.
126 daysWithData = getDaysWithData(self.butler)
127 self.assertLessEqual(RECENT_DAY, max(daysWithData))
129 # no test here, but print a warning if it hasn't been updated recently
130 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d")
131 now = datetime.datetime.today()
132 timeSinceUpdate = now - recentDay_datetime
133 if timeSinceUpdate.days > 100: # TODO:
134 print(f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. "
135 "You might want to consider updating this to speed up butler queries.")
137 def test_sanitizeDayObs(self):
138 dayObs = '2020-01-02'
139 self.assertEqual(sanitizeDayObs(dayObs), 20200102)
140 dayObs = 20210201
141 self.assertEqual(sanitizeDayObs(dayObs), dayObs)
143 with self.assertRaises(ValueError):
144 sanitizeDayObs(1.234)
145 sanitizeDayObs('Febuary 29th, 1970')
147 def test_getMostRecentDayObs(self):
148 # just a basic sanity check here as we can't know the value,
149 # but at least check something is returned, and is plausible
150 recentDay = getMostRecentDayObs(self.butler)
151 self.assertIsInstance(recentDay, int)
152 self.assertTrue(recentDay >= RECENT_DAY)
153 # some test data might be set a millennium in the future, i.e.
154 # the year wouldd be 2XXX+1000, so set to y4k just in case
155 self.assertTrue(recentDay < 40000000)
157 def test_getSeqNumsForDayObs(self):
158 emptyDay = 19990101
159 seqnums = getSeqNumsForDayObs(self.butler, emptyDay)
160 self.assertIsInstance(seqnums, Iterable)
161 self.assertEqual(len(list(seqnums)), 0)
163 recentDay = getMostRecentDayObs(self.butler)
164 seqnums = getSeqNumsForDayObs(self.butler, recentDay)
165 self.assertIsInstance(seqnums, Iterable)
166 self.assertTrue(len(list(seqnums)) >= 1)
168 def test_getMostRecentDataId(self):
169 # we can't know the values, but it should always return something
170 # and the dict and int forms should always have certain keys and agree
171 dataId = getMostRecentDataId(self.butler)
172 self.assertIsInstance(dataId, dict)
173 self.assertIn('day_obs', dataId)
174 self.assertIn('seq_num', dataId)
175 self.assertTrue('exposure' in dataId or 'exposure.id' in dataId)
177 def test_getDatasetRefForDataId(self):
178 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataId)
179 self.assertIsInstance(dRef, DatasetRef)
181 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataIdNoDayObSeqNum)
182 self.assertIsInstance(dRef, DatasetRef)
183 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordMinimal)
184 self.assertIsInstance(dRef, DatasetRef)
185 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordFullView)
186 self.assertIsInstance(dRef, DatasetRef)
188 def test__dayobs_present(self):
189 goods = [{'day_obs': 123}, {'exposure.day_obs': 234}, {'day_obs': 345, 'otherkey': -1}]
190 bads = [{'different_key': 123}]
191 for good in goods:
192 self.assertTrue(_dayobs_present(good))
193 for bad in bads:
194 self.assertFalse(_dayobs_present(bad))
196 def test__seqnum_present(self):
197 goods = [{'seq_num': 123}, {'exposure.seq_num': 234}, {'seq_num': 345, 'otherkey': -1}]
198 bads = [{'different_key': 123}]
199 for good in goods:
200 self.assertTrue(_seqnum_present(good))
201 for bad in bads:
202 self.assertFalse(_seqnum_present(bad))
204 def test__expid_present(self):
205 goods = [{'exposure': 123}, {'exposure.id': 234}, {'exposure.id': 345, 'otherkey': -1}]
206 bads = [{'different_key': 123}]
207 for good in goods:
208 self.assertTrue(_expid_present(good))
209 for bad in bads:
210 self.assertFalse(_expid_present(bad))
212 def test_getDayObs(self):
213 dayVal = 98765
214 goods = [{'day_obs': dayVal}, {'exposure.day_obs': dayVal}, {'day_obs': dayVal, 'otherkey': -1}]
215 bads = [{'different_key': 123}]
216 for good in goods:
217 self.assertTrue(getDayObs(good) == dayVal)
218 for bad in bads:
219 self.assertTrue(getDayObs(bad) is None)
221 def test_getSeqNum(self):
222 seqVal = 12345
223 goods = [{'seq_num': seqVal}, {'exposure.seq_num': seqVal}, {'seq_num': seqVal, 'otherkey': -1}]
224 bads = [{'different_key': 123}]
225 for good in goods:
226 self.assertTrue(getSeqNum(good) == seqVal)
227 for bad in bads:
228 self.assertTrue(getSeqNum(bad) is None)
230 def test_getExpId(self):
231 expIdVal = 12345
232 goods = [{'exposure': expIdVal}, {'exposure.id': expIdVal}, {'exposure': expIdVal, 'otherkey': -1}]
233 bads = [{'different_key': 123}]
234 for good in goods:
235 self.assertTrue(getExpId(good) == expIdVal)
236 for bad in bads:
237 self.assertTrue(getExpId(bad) is None)
239 def test_datasetExists(self):
240 self.assertTrue(datasetExists(self.butler, 'raw', self.rawDataId))
241 self.assertTrue(datasetExists(self.butler, 'raw', self.expIdOnly))
242 self.assertTrue(datasetExists(self.butler, 'raw', self.dayObsSeqNumIdOnly))
243 return
245 def test_sortRecordsByDayObsThenSeqNum(self):
246 where = "exposure.day_obs=day_obs"
247 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where,
248 bind={'day_obs': RECENT_DAY})
249 expRecords = list(expRecords)
250 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test
251 random.shuffle(expRecords) # they are often already in order, so make sure they're not
252 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords)
253 for i, _id in enumerate(sortedIds[:-1]):
254 self.assertTrue(_id.seq_num < sortedIds[i+1].seq_num)
256 # Check that ambiguous sorts raise as expected
257 with self.assertRaises(ValueError):
258 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where,
259 bind={'day_obs': RECENT_DAY})
260 expRecords = list(expRecords)
261 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test
262 expRecords.append(expRecords[0]) # add a duplicate
263 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords)
264 return
266 def test_getDaysWithData(self):
267 days = getDaysWithData(self.butler)
268 self.assertTrue(len(days) >= 0)
269 self.assertIsInstance(days[0], int)
270 return
272 def test_getExpIdFromDayObsSeqNum(self):
273 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly)
274 self.assertTrue(_expid_present(expId))
275 return
277 def test_updateDataIdOrDataCord(self):
278 updateVals = {'testKey': 'testValue'}
280 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView]
281 for originalId in ids:
282 testId = updateDataIdOrDataCord(originalId, **updateVals)
283 for k, v in updateVals.items():
284 self.assertTrue(testId[k] == v)
285 return
287 def test_fillDataId(self):
288 self.assertFalse(_dayobs_present(self.expIdOnly))
289 self.assertFalse(_seqnum_present(self.expIdOnly))
291 fullId = fillDataId(self.butler, self.expIdOnly)
292 self.assertTrue(_dayobs_present(fullId))
293 self.assertTrue(_seqnum_present(fullId))
295 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView]
296 for dataId in ids:
297 fullId = fillDataId(self.butler, dataId)
298 self.assertTrue(_dayobs_present(fullId))
299 self.assertTrue(_seqnum_present(fullId))
300 self.assertTrue(_expid_present(fullId))
301 return
303 def test_getExpRecordFromDataId(self):
304 record = getExpRecordFromDataId(self.butler, self.rawDataId)
305 self.assertIsInstance(record, dafButler.dimensions.DimensionRecord)
306 return
308 def test_getDayObsSeqNumFromExposureId(self):
309 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly)
310 self.assertTrue(_dayobs_present(dayObsSeqNum))
311 self.assertTrue(_seqnum_present(dayObsSeqNum))
312 return
314 def test_removeDataProduct(self):
315 # Can't think of an easy or safe test for this
316 return
318 def test_getLatissOnSkyDataIds(self):
319 # This is very slow, consider removing as it's the least import of all
320 # the util functions. However, restricting it to only the most recent
321 # day does help a lot, so probably OK like that, and should speed up
322 # with middleware improvements in the future, and we should ensure
323 # that they don't break this, so inclined to leave for now
324 dayToUse = getDaysWithData(self.butler)[-1]
325 # the most recent day with data might only be biases or flats so make
326 # sure to override the default of skipping biases, darks & flats
327 skipTypes = ()
328 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse)
329 self.assertTrue(len(ids) > 0)
330 self.assertTrue(ids[0] is not None)
332 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse,
333 full=True)
334 self.assertTrue(len(ids) > 0)
335 self.assertTrue(ids[0] is not None)
336 testId = ids[0]
337 self.assertTrue(_dayobs_present(testId))
338 self.assertTrue(_seqnum_present(testId))
339 self.assertTrue(_expid_present(testId))
340 return
342 def test__assureDict(self):
343 for item in [self.rawDataId, self.fullId, self.expIdOnly,
344 self.expRecordNoDetector, self.dataCoordFullView,
345 self.dataCoordMinimal, self.rawDataIdNoDayObSeqNum]:
346 testId = _assureDict(item)
347 self.assertIsInstance(testId, dict)
348 return
350 def test__get_dayobs_key(self):
351 dataId = {'a_random_key': 321, 'exposure.day_obs': 20200312, 'z_random_key': 'abc'}
352 self.assertTrue(_get_dayobs_key(dataId) == 'exposure.day_obs')
353 dataId = {'day_obs': 20200312}
354 self.assertTrue(_get_dayobs_key(dataId) == 'day_obs')
355 dataId = {'missing': 20200312}
356 self.assertTrue(_get_dayobs_key(dataId) is None)
357 return
359 def test__get_seqnum_key(self):
360 dataId = {'a_random_key': 321, 'exposure.seq_num': 123, 'z_random_key': 'abc'}
361 self.assertTrue(_get_seqnum_key(dataId) == 'exposure.seq_num')
362 dataId = {'seq_num': 123}
363 self.assertTrue(_get_seqnum_key(dataId) == 'seq_num')
364 dataId = {'missing': 123}
365 self.assertTrue(_get_seqnum_key(dataId) is None)
366 return
368 def test__get_expid_key(self):
369 dataId = {'a_random_key': 321, 'exposure.id': 123, 'z_random_key': 'abc'}
370 self.assertTrue(_get_expid_key(dataId) == 'exposure.id')
371 dataId = {'a_random_key': 321, 'exposure': 123, 'z_random_key': 'abc'}
372 self.assertTrue(_get_expid_key(dataId) == 'exposure')
373 dataId = {'missing': 123}
374 self.assertTrue(_get_expid_key(dataId) is None)
375 return
377 def test_updateDataId(self):
378 # check with a dataCoordinate
379 dataId = copy.copy(self.expRecordNoDetector.dataId)
380 self.assertTrue('detector' not in dataId)
381 dataId = updateDataId(dataId, detector=123)
382 self.assertTrue('detector' in dataId)
383 self.assertEqual(dataId['detector'], 123)
385 # check with a dict
386 self.assertIsInstance(self.rawDataId, dict)
387 dataId = copy.copy(self.rawDataId)
388 dataId.pop('detector')
389 self.assertTrue('detector' not in dataId)
390 dataId = updateDataId(dataId, detector=321)
391 self.assertTrue('detector' in dataId)
392 self.assertEqual(dataId['detector'], 321)
394 def test_getExpRecord(self):
395 expId = self.expIdOnly['exposure']
396 dayObs = self.dayObsSeqNumIdOnly['day_obs']
397 seqNum = self.dayObsSeqNumIdOnly['seq_num']
399 recordByExpId = getExpRecord(self.butler, 'LATISS', expId=expId)
400 self.assertIsInstance(recordByExpId, dafButler.dimensions.DimensionRecord)
402 recordByDayObsSeqNum = getExpRecord(self.butler, 'LATISS', dayObs=dayObs, seqNum=seqNum)
403 self.assertIsInstance(recordByDayObsSeqNum, dafButler.dimensions.DimensionRecord)
404 self.assertEqual(recordByExpId, recordByDayObsSeqNum)
406 with self.assertRaises(ValueError):
407 # because we need dayObs too, so immediate raise due to bad args
408 _ = getExpRecord(self.butler, 'LATISS', seqNum=seqNum)
410 with self.assertRaises(RuntimeError):
411 # (dayObs, seqNum) no longer matches the expId, so there are no
412 # results, which is a RuntimeError
413 _ = getExpRecord(self.butler, 'LATISS', expId=expId, dayObs=dayObs, seqNum=seqNum+1)
416class ButlerInitTestCase(lsst.utils.tests.TestCase):
417 """Separately test whether we can make a butler with the env var set
418 and that the expected error type is raised and passed through when it is
419 not, as this is relied upon to correctly skip tests when butler init is
420 not possible.
421 """
423 def test_dafButlerRaiseTypes(self):
424 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then
425 # using an instrument label raises a FileNotFoundError
426 with unittest.mock.patch.dict('os.environ'):
427 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there
428 del os.environ['DAF_BUTLER_REPOSITORY_INDEX']
429 with self.assertRaises(FileNotFoundError):
430 dafButler.Butler('LATISS')
432 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty
433 # string then using a label raises a RuntimeError
434 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}):
435 with self.assertRaises(FileNotFoundError):
436 dafButler.Butler('LATISS')
438 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given
439 # camera existing, but we can check that we get the expected error
440 # when trying to init an instrument which definitely won't be defined.
441 if os.getenv('DAF_BUTLER_REPOSITORY_INDEX'):
442 with self.assertRaises(FileNotFoundError):
443 dafButler.Butler('NotAValidCameraName')
445 def test_makeDefaultLatissButlerRaiseTypes(self):
446 """makeDefaultLatissButler unifies the mixed exception types from
447 butler inits, so test all available possibilities here.
448 """
449 with unittest.mock.patch.dict('os.environ'):
450 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there
451 del os.environ['DAF_BUTLER_REPOSITORY_INDEX']
452 with self.assertRaises(FileNotFoundError):
453 makeDefaultLatissButler()
455 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}):
456 with self.assertRaises(FileNotFoundError):
457 makeDefaultLatissButler()
459 fakeFile = '/path/to/a/file/which/does/not_exist.yaml'
460 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}):
461 with self.assertRaises(FileNotFoundError):
462 makeDefaultLatissButler()
464 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self):
465 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point
466 # to an actual file
467 repoFile = os.getenv('DAF_BUTLER_REPOSITORY_INDEX')
468 if repoFile:
469 self.assertTrue(ResourcePath(repoFile).exists())
472class TestMemory(lsst.utils.tests.MemoryTestCase):
473 pass
476def setup_module(module):
477 lsst.utils.tests.init()
480if __name__ == "__main__": 480 ↛ 481line 480 didn't jump to line 481, because the condition on line 480 was never true
481 lsst.utils.tests.init()
482 unittest.main()