Coverage for tests/test_butlerUtils.py: 14%
294 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-17 04:24 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-17 04:24 -0700
1# This file is part of summit_utils.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import os
23import unittest
24from typing import Iterable
25import datetime
26import random
27import copy
29import lsst.utils.tests
30from lsst.summit.utils.butlerUtils import (makeDefaultLatissButler,
31 updateDataId,
32 sanitize_day_obs,
33 getMostRecentDayObs,
34 getSeqNumsForDayObs,
35 getMostRecentDataId,
36 getDatasetRefForDataId,
37 _dayobs_present,
38 _seqnum_present,
39 _expid_present,
40 _get_dayobs_key,
41 _get_seqnum_key,
42 _get_expid_key,
43 getDayObs,
44 getSeqNum,
45 getExpId,
46 datasetExists,
47 sortRecordsByDayObsThenSeqNum,
48 getDaysWithData,
49 getExpIdFromDayObsSeqNum,
50 updateDataIdOrDataCord,
51 fillDataId,
52 getExpRecordFromDataId,
53 getDayObsSeqNumFromExposureId,
54 getLatissOnSkyDataIds,
55 _assureDict,
56 getLatissDefaultCollections,
57 RECENT_DAY,
58 )
59from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401
60import lsst.daf.butler as dafButler
61from lsst.resources import ResourcePath
64class ButlerUtilsTestCase(lsst.utils.tests.TestCase):
65 """A test case for testing sky position offsets for exposures."""
67 def setUp(self):
68 # this also functions as test_makeDefaultLatissButler(), but we may as
69 # well catch the butler once it's made so it can be reused if needed,
70 # given how hard it is to made it robustly
72 # butler stuff
73 try:
74 self.butler = makeDefaultLatissButler()
75 except FileNotFoundError:
76 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.")
77 self.assertIsInstance(self.butler, dafButler.Butler)
79 # dict-like dataIds
80 self.rawDataId = getMostRecentDataId(self.butler)
81 self.fullId = fillDataId(self.butler, self.rawDataId)
82 self.assertIn('exposure', self.fullId)
83 self.assertIn('day_obs', self.fullId)
84 self.assertIn('seq_num', self.fullId)
85 self.expIdOnly = {'exposure': self.fullId['exposure'], 'detector': 0}
86 self.dayObsSeqNumIdOnly = {'day_obs': getDayObs(self.fullId), 'seq_num': getSeqNum(self.fullId),
87 'detector': 0}
89 # expRecords
90 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId)
91 self.assertIsInstance(self.expRecordNoDetector, dafButler.dimensions.DimensionRecord)
92 self.assertFalse(hasattr(self.expRecordNoDetector, 'detector'))
93 self.assertFalse('detector' in self.expRecordNoDetector.dataId)
94 # just a crosscheck on the above to make sure other things are correct
95 self.assertTrue(hasattr(self.expRecordNoDetector, 'instrument'))
97 # data coordinates
98 # popping here because butler.registry.expandDataId cannot have
99 # day_obs or seq_num present right now
100 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId)
101 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum):
102 rawDataIdNoDayObSeqNum.pop(dayObsKey)
103 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum):
104 rawDataIdNoDayObSeqNum.pop(seqNumKey)
105 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum
106 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0)
107 self.dataCoordFullView = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum,
108 detector=0).full
109 self.assertIsInstance(self.dataCoordMinimal, dafButler.dimensions.DataCoordinate)
110 # NB the type check below is currently using a non-public API, but
111 # at present there isn't a good alternative
112 viewType = dafButler.core.dimensions._coordinate._DataCoordinateFullView
113 self.assertIsInstance(self.dataCoordFullView, viewType)
115 def test_getLatissDefaultCollections(self):
116 defaultCollections = getLatissDefaultCollections()
117 self.assertTrue(defaultCollections is not None)
118 self.assertTrue(defaultCollections != [])
119 self.assertTrue(len(defaultCollections) >= 1)
121 def test_RECENT_DAY(self):
122 todayInt = int(datetime.date.today().strftime("%Y%m%d"))
123 self.assertTrue(RECENT_DAY <= todayInt) # in the past
124 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past
126 # check that the value of RECENT_DAY is before the end of the data.
127 daysWithData = getDaysWithData(self.butler)
128 self.assertLessEqual(RECENT_DAY, max(daysWithData))
130 # no test here, but print a warning if it hasn't been updated recently
131 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d")
132 now = datetime.datetime.today()
133 timeSinceUpdate = now - recentDay_datetime
134 if timeSinceUpdate.days > 100: # TODO:
135 print(f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. "
136 "You might want to consider updating this to speed up butler queries.")
138 def test_sanitize_day_obs(self):
139 dayObs = '2020-01-02'
140 self.assertEqual(sanitize_day_obs(dayObs), 20200102)
141 dayObs = 20210201
142 self.assertEqual(sanitize_day_obs(dayObs), dayObs)
144 with self.assertRaises(ValueError):
145 sanitize_day_obs(1.234)
146 sanitize_day_obs('Febuary 29th, 1970')
148 def test_getMostRecentDayObs(self):
149 # just a basic sanity check here as we can't know the value,
150 # but at least check something is returned, and is plausible
151 recentDay = getMostRecentDayObs(self.butler)
152 self.assertIsInstance(recentDay, int)
153 self.assertTrue(recentDay >= RECENT_DAY)
154 # some test data might be set a millennium in the future, i.e.
155 # the year wouldd be 2XXX+1000, so set to y4k just in case
156 self.assertTrue(recentDay < 40000000)
158 def test_getSeqNumsForDayObs(self):
159 emptyDay = 19990101
160 seqnums = getSeqNumsForDayObs(self.butler, emptyDay)
161 self.assertIsInstance(seqnums, Iterable)
162 self.assertEqual(len(list(seqnums)), 0)
164 recentDay = getMostRecentDayObs(self.butler)
165 seqnums = getSeqNumsForDayObs(self.butler, recentDay)
166 self.assertIsInstance(seqnums, Iterable)
167 self.assertTrue(len(list(seqnums)) >= 1)
169 def test_getMostRecentDataId(self):
170 # we can't know the values, but it should always return something
171 # and the dict and int forms should always have certain keys and agree
172 dataId = getMostRecentDataId(self.butler)
173 self.assertIsInstance(dataId, dict)
174 self.assertIn('day_obs', dataId)
175 self.assertIn('seq_num', dataId)
176 self.assertTrue('exposure' in dataId or 'exposure.id' in dataId)
178 def test_getDatasetRefForDataId(self):
179 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataId)
180 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef)
182 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataIdNoDayObSeqNum)
183 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef)
184 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordMinimal)
185 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef)
186 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordFullView)
187 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef)
189 def test__dayobs_present(self):
190 goods = [{'day_obs': 123}, {'exposure.day_obs': 234}, {'day_obs': 345, 'otherkey': -1}]
191 bads = [{'different_key': 123}]
192 for good in goods:
193 self.assertTrue(_dayobs_present(good))
194 for bad in bads:
195 self.assertFalse(_dayobs_present(bad))
197 def test__seqnum_present(self):
198 goods = [{'seq_num': 123}, {'exposure.seq_num': 234}, {'seq_num': 345, 'otherkey': -1}]
199 bads = [{'different_key': 123}]
200 for good in goods:
201 self.assertTrue(_seqnum_present(good))
202 for bad in bads:
203 self.assertFalse(_seqnum_present(bad))
205 def test__expid_present(self):
206 goods = [{'exposure': 123}, {'exposure.id': 234}, {'exposure.id': 345, 'otherkey': -1}]
207 bads = [{'different_key': 123}]
208 for good in goods:
209 self.assertTrue(_expid_present(good))
210 for bad in bads:
211 self.assertFalse(_expid_present(bad))
213 def test_getDayObs(self):
214 dayVal = 98765
215 goods = [{'day_obs': dayVal}, {'exposure.day_obs': dayVal}, {'day_obs': dayVal, 'otherkey': -1}]
216 bads = [{'different_key': 123}]
217 for good in goods:
218 self.assertTrue(getDayObs(good) == dayVal)
219 for bad in bads:
220 self.assertTrue(getDayObs(bad) is None)
222 def test_getSeqNum(self):
223 seqVal = 12345
224 goods = [{'seq_num': seqVal}, {'exposure.seq_num': seqVal}, {'seq_num': seqVal, 'otherkey': -1}]
225 bads = [{'different_key': 123}]
226 for good in goods:
227 self.assertTrue(getSeqNum(good) == seqVal)
228 for bad in bads:
229 self.assertTrue(getSeqNum(bad) is None)
231 def test_getExpId(self):
232 expIdVal = 12345
233 goods = [{'exposure': expIdVal}, {'exposure.id': expIdVal}, {'exposure': expIdVal, 'otherkey': -1}]
234 bads = [{'different_key': 123}]
235 for good in goods:
236 self.assertTrue(getExpId(good) == expIdVal)
237 for bad in bads:
238 self.assertTrue(getExpId(bad) is None)
240 def test_datasetExists(self):
241 self.assertTrue(datasetExists(self.butler, 'raw', self.rawDataId))
242 self.assertTrue(datasetExists(self.butler, 'raw', self.expIdOnly))
243 self.assertTrue(datasetExists(self.butler, 'raw', self.dayObsSeqNumIdOnly))
244 return
246 def test_sortRecordsByDayObsThenSeqNum(self):
247 where = "exposure.day_obs=day_obs"
248 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where,
249 bind={'day_obs': RECENT_DAY})
250 expRecords = list(expRecords)
251 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test
252 random.shuffle(expRecords) # they are often already in order, so make sure they're not
253 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords)
254 for i, _id in enumerate(sortedIds[:-1]):
255 self.assertTrue(_id.seq_num < sortedIds[i+1].seq_num)
257 # Check that ambiguous sorts raise as expected
258 with self.assertRaises(ValueError):
259 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where,
260 bind={'day_obs': RECENT_DAY})
261 expRecords = list(expRecords)
262 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test
263 expRecords.append(expRecords[0]) # add a duplicate
264 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords)
265 return
267 def test_getDaysWithData(self):
268 days = getDaysWithData(self.butler)
269 self.assertTrue(len(days) >= 0)
270 self.assertIsInstance(days[0], int)
271 return
273 def test_getExpIdFromDayObsSeqNum(self):
274 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly)
275 self.assertTrue(_expid_present(expId))
276 return
278 def test_updateDataIdOrDataCord(self):
279 updateVals = {'testKey': 'testValue'}
281 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView]
282 for originalId in ids:
283 testId = updateDataIdOrDataCord(originalId, **updateVals)
284 for k, v in updateVals.items():
285 self.assertTrue(testId[k] == v)
286 return
288 def test_fillDataId(self):
289 self.assertFalse(_dayobs_present(self.expIdOnly))
290 self.assertFalse(_seqnum_present(self.expIdOnly))
292 fullId = fillDataId(self.butler, self.expIdOnly)
293 self.assertTrue(_dayobs_present(fullId))
294 self.assertTrue(_seqnum_present(fullId))
296 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView]
297 for dataId in ids:
298 fullId = fillDataId(self.butler, dataId)
299 self.assertTrue(_dayobs_present(fullId))
300 self.assertTrue(_seqnum_present(fullId))
301 self.assertTrue(_expid_present(fullId))
302 return
304 def test_getExpRecordFromDataId(self):
305 record = getExpRecordFromDataId(self.butler, self.rawDataId)
306 self.assertIsInstance(record, dafButler.dimensions.DimensionRecord)
307 return
309 def test_getDayObsSeqNumFromExposureId(self):
310 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly)
311 self.assertTrue(_dayobs_present(dayObsSeqNum))
312 self.assertTrue(_seqnum_present(dayObsSeqNum))
313 return
315 def test_removeDataProduct(self):
316 # Can't think of an easy or safe test for this
317 return
319 def test_getLatissOnSkyDataIds(self):
320 # This is very slow, consider removing as it's the least import of all
321 # the util functions. However, restricting it to only the most recent
322 # day does help a lot, so probably OK like that, and should speed up
323 # with middleware improvements in the future, and we should ensure
324 # that they don't break this, so inclined to leave for now
325 dayToUse = getDaysWithData(self.butler)[-1]
326 # the most recent day with data might only be biases or flats so make
327 # sure to override the default of skipping biases, darks & flats
328 skipTypes = ()
329 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse)
330 self.assertTrue(len(ids) > 0)
331 self.assertTrue(ids[0] is not None)
333 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse,
334 full=True)
335 self.assertTrue(len(ids) > 0)
336 self.assertTrue(ids[0] is not None)
337 testId = ids[0]
338 self.assertTrue(_dayobs_present(testId))
339 self.assertTrue(_seqnum_present(testId))
340 self.assertTrue(_expid_present(testId))
341 return
343 def test__assureDict(self):
344 for item in [self.rawDataId, self.fullId, self.expIdOnly,
345 self.expRecordNoDetector, self.dataCoordFullView,
346 self.dataCoordMinimal, self.rawDataIdNoDayObSeqNum]:
347 testId = _assureDict(item)
348 self.assertIsInstance(testId, dict)
349 return
351 def test__get_dayobs_key(self):
352 dataId = {'a_random_key': 321, 'exposure.day_obs': 20200312, 'z_random_key': 'abc'}
353 self.assertTrue(_get_dayobs_key(dataId) == 'exposure.day_obs')
354 dataId = {'day_obs': 20200312}
355 self.assertTrue(_get_dayobs_key(dataId) == 'day_obs')
356 dataId = {'missing': 20200312}
357 self.assertTrue(_get_dayobs_key(dataId) is None)
358 return
360 def test__get_seqnum_key(self):
361 dataId = {'a_random_key': 321, 'exposure.seq_num': 123, 'z_random_key': 'abc'}
362 self.assertTrue(_get_seqnum_key(dataId) == 'exposure.seq_num')
363 dataId = {'seq_num': 123}
364 self.assertTrue(_get_seqnum_key(dataId) == 'seq_num')
365 dataId = {'missing': 123}
366 self.assertTrue(_get_seqnum_key(dataId) is None)
367 return
369 def test__get_expid_key(self):
370 dataId = {'a_random_key': 321, 'exposure.id': 123, 'z_random_key': 'abc'}
371 self.assertTrue(_get_expid_key(dataId) == 'exposure.id')
372 dataId = {'a_random_key': 321, 'exposure': 123, 'z_random_key': 'abc'}
373 self.assertTrue(_get_expid_key(dataId) == 'exposure')
374 dataId = {'missing': 123}
375 self.assertTrue(_get_expid_key(dataId) is None)
376 return
378 def test_updateDataId(self):
379 # check with a dataCoordinate
380 dataId = copy.copy(self.expRecordNoDetector.dataId)
381 self.assertTrue('detector' not in dataId)
382 dataId = updateDataId(dataId, detector=123)
383 self.assertTrue('detector' in dataId)
384 self.assertEqual(dataId['detector'], 123)
386 # check with a dict
387 self.assertIsInstance(self.rawDataId, dict)
388 dataId = copy.copy(self.rawDataId)
389 dataId.pop('detector')
390 self.assertTrue('detector' not in dataId)
391 dataId = updateDataId(dataId, detector=321)
392 self.assertTrue('detector' in dataId)
393 self.assertEqual(dataId['detector'], 321)
396class ButlerInitTestCase(lsst.utils.tests.TestCase):
397 """Separately test whether we can make a butler with the env var set
398 and that the expected error type is raised and passed through when it is
399 not, as this is relied upon to correctly skip tests when butler init is
400 not possible.
401 """
403 def test_dafButlerRaiseTypes(self):
404 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then
405 # using an instrument label raises a FileNotFoundError
406 with unittest.mock.patch.dict('os.environ'):
407 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there
408 del os.environ['DAF_BUTLER_REPOSITORY_INDEX']
409 with self.assertRaises(FileNotFoundError):
410 dafButler.Butler('LATISS')
412 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty
413 # string then using a label raises a RuntimeError
414 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}):
415 with self.assertRaises(RuntimeError):
416 dafButler.Butler('LATISS')
418 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given
419 # camera existing, but we can check that we get the expected error
420 # when trying to init an instrument which definitely won't be defined.
421 if os.getenv('DAF_BUTLER_REPOSITORY_INDEX'):
422 with self.assertRaises(FileNotFoundError):
423 dafButler.Butler('NotAValidCameraName')
425 def test_makeDefaultLatissButlerRaiseTypes(self):
426 """makeDefaultLatissButler unifies the mixed exception types from
427 butler inits, so test all available possibilities here.
428 """
429 with unittest.mock.patch.dict('os.environ'):
430 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there
431 del os.environ['DAF_BUTLER_REPOSITORY_INDEX']
432 with self.assertRaises(FileNotFoundError):
433 makeDefaultLatissButler()
435 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}):
436 with self.assertRaises(FileNotFoundError):
437 makeDefaultLatissButler()
439 fakeFile = '/path/to/a/file/which/does/not_exist.yaml'
440 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}):
441 with self.assertRaises(FileNotFoundError):
442 makeDefaultLatissButler()
444 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self):
445 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point
446 # to an actual file
447 repoFile = os.getenv('DAF_BUTLER_REPOSITORY_INDEX')
448 if repoFile:
449 self.assertTrue(ResourcePath(repoFile).exists())
452class TestMemory(lsst.utils.tests.MemoryTestCase):
453 pass
456def setup_module(module):
457 lsst.utils.tests.init()
460if __name__ == "__main__": 460 ↛ 461line 460 didn't jump to line 461, because the condition on line 460 was never true
461 lsst.utils.tests.init()
462 unittest.main()