Coverage for tests/test_butlerUtils.py: 14%

279 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-04 03:40 -0800

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24from typing import Iterable 

25import datetime 

26import random 

27 

28import lsst.utils.tests 

29from lsst.summit.utils.butlerUtils import (makeDefaultLatissButler, 

30 sanitize_day_obs, 

31 getMostRecentDayObs, 

32 getSeqNumsForDayObs, 

33 getMostRecentDataId, 

34 getDatasetRefForDataId, 

35 _dayobs_present, 

36 _seqnum_present, 

37 _expid_present, 

38 _get_dayobs_key, 

39 _get_seqnum_key, 

40 _get_expid_key, 

41 getDayObs, 

42 getSeqNum, 

43 getExpId, 

44 datasetExists, 

45 sortRecordsByDayObsThenSeqNum, 

46 getDaysWithData, 

47 getExpIdFromDayObsSeqNum, 

48 updateDataIdOrDataCord, 

49 fillDataId, 

50 getExpRecordFromDataId, 

51 getDayObsSeqNumFromExposureId, 

52 getLatissOnSkyDataIds, 

53 _assureDict, 

54 getLatissDefaultCollections, 

55 RECENT_DAY, 

56 ) 

57from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401 

58import lsst.daf.butler as dafButler 

59from lsst.resources import ResourcePath 

60 

61 

62class ButlerUtilsTestCase(lsst.utils.tests.TestCase): 

63 """A test case for testing sky position offsets for exposures.""" 

64 

65 def setUp(self): 

66 # this also functions as test_makeDefaultLatissButler(), but we may as 

67 # well catch the butler once it's made so it can be reused if needed, 

68 # given how hard it is to made it robustly 

69 

70 # butler stuff 

71 try: 

72 self.butler = makeDefaultLatissButler() 

73 except FileNotFoundError: 

74 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.") 

75 self.assertIsInstance(self.butler, dafButler.Butler) 

76 

77 # dict-like dataIds 

78 self.rawDataId = getMostRecentDataId(self.butler) 

79 self.fullId = fillDataId(self.butler, self.rawDataId) 

80 self.assertIn('exposure', self.fullId) 

81 self.assertIn('day_obs', self.fullId) 

82 self.assertIn('seq_num', self.fullId) 

83 self.expIdOnly = {'exposure': self.fullId['exposure'], 'detector': 0} 

84 self.dayObsSeqNumIdOnly = {'day_obs': getDayObs(self.fullId), 'seq_num': getSeqNum(self.fullId), 

85 'detector': 0} 

86 

87 # expRecords 

88 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId) 

89 self.assertIsInstance(self.expRecordNoDetector, dafButler.dimensions.DimensionRecord) 

90 self.assertFalse(hasattr(self.expRecordNoDetector, 'detector')) 

91 # just a crosscheck on the above to make sure other things are correct 

92 self.assertTrue(hasattr(self.expRecordNoDetector, 'instrument')) 

93 

94 # data coordinates 

95 # popping here because butler.registry.expandDataId cannot have 

96 # day_obs or seq_num present right now 

97 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId) 

98 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum): 

99 rawDataIdNoDayObSeqNum.pop(dayObsKey) 

100 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum): 

101 rawDataIdNoDayObSeqNum.pop(seqNumKey) 

102 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum 

103 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0) 

104 self.dataCoordFullView = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, 

105 detector=0).full 

106 self.assertIsInstance(self.dataCoordMinimal, dafButler.dimensions.DataCoordinate) 

107 # NB the type check below is currently using a non-public API, but 

108 # at present there isn't a good alternative 

109 viewType = dafButler.core.dimensions._coordinate._DataCoordinateFullView 

110 self.assertIsInstance(self.dataCoordFullView, viewType) 

111 

112 def test_getLatissDefaultCollections(self): 

113 defaultCollections = getLatissDefaultCollections() 

114 self.assertTrue(defaultCollections is not None) 

115 self.assertTrue(defaultCollections != []) 

116 self.assertTrue(len(defaultCollections) >= 1) 

117 

118 def test_RECENT_DAY(self): 

119 todayInt = int(datetime.date.today().strftime("%Y%m%d")) 

120 self.assertTrue(RECENT_DAY <= todayInt) # in the past 

121 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past 

122 

123 # check that the value of RECENT_DAY is before the end of the data. 

124 daysWithData = getDaysWithData(self.butler) 

125 self.assertLessEqual(RECENT_DAY, max(daysWithData)) 

126 

127 # no test here, but print a warning if it hasn't been updated recently 

128 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d") 

129 now = datetime.datetime.today() 

130 timeSinceUpdate = now - recentDay_datetime 

131 if timeSinceUpdate.days > 100: # TODO: 

132 print(f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. " 

133 "You might want to consider updating this to speed up butler queries.") 

134 

135 def test_sanitize_day_obs(self): 

136 dayObs = '2020-01-02' 

137 self.assertEqual(sanitize_day_obs(dayObs), 20200102) 

138 dayObs = 20210201 

139 self.assertEqual(sanitize_day_obs(dayObs), dayObs) 

140 

141 with self.assertRaises(ValueError): 

142 sanitize_day_obs(1.234) 

143 sanitize_day_obs('Febuary 29th, 1970') 

144 

145 def test_getMostRecentDayObs(self): 

146 # just a basic sanity check here as we can't know the value, 

147 # but at least check something is returned, and is plausible 

148 recentDay = getMostRecentDayObs(self.butler) 

149 self.assertIsInstance(recentDay, int) 

150 self.assertTrue(recentDay >= RECENT_DAY) 

151 # some test data might be set a millennium in the future, i.e. 

152 # the year wouldd be 2XXX+1000, so set to y4k just in case 

153 self.assertTrue(recentDay < 40000000) 

154 

155 def test_getSeqNumsForDayObs(self): 

156 emptyDay = 19990101 

157 seqnums = getSeqNumsForDayObs(self.butler, emptyDay) 

158 self.assertIsInstance(seqnums, Iterable) 

159 self.assertEqual(len(list(seqnums)), 0) 

160 

161 recentDay = getMostRecentDayObs(self.butler) 

162 seqnums = getSeqNumsForDayObs(self.butler, recentDay) 

163 self.assertIsInstance(seqnums, Iterable) 

164 self.assertTrue(len(list(seqnums)) >= 1) 

165 

166 def test_getMostRecentDataId(self): 

167 # we can't know the values, but it should always return something 

168 # and the dict and int forms should always have certain keys and agree 

169 dataId = getMostRecentDataId(self.butler) 

170 self.assertIsInstance(dataId, dict) 

171 self.assertIn('day_obs', dataId) 

172 self.assertIn('seq_num', dataId) 

173 self.assertTrue('exposure' in dataId or 'exposure.id' in dataId) 

174 

175 def test_getDatasetRefForDataId(self): 

176 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataId) 

177 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

178 

179 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataIdNoDayObSeqNum) 

180 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

181 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordMinimal) 

182 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

183 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordFullView) 

184 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

185 

186 def test__dayobs_present(self): 

187 goods = [{'day_obs': 123}, {'exposure.day_obs': 234}, {'day_obs': 345, 'otherkey': -1}] 

188 bads = [{'different_key': 123}] 

189 for good in goods: 

190 self.assertTrue(_dayobs_present(good)) 

191 for bad in bads: 

192 self.assertFalse(_dayobs_present(bad)) 

193 

194 def test__seqnum_present(self): 

195 goods = [{'seq_num': 123}, {'exposure.seq_num': 234}, {'seq_num': 345, 'otherkey': -1}] 

196 bads = [{'different_key': 123}] 

197 for good in goods: 

198 self.assertTrue(_seqnum_present(good)) 

199 for bad in bads: 

200 self.assertFalse(_seqnum_present(bad)) 

201 

202 def test__expid_present(self): 

203 goods = [{'exposure': 123}, {'exposure.id': 234}, {'exposure.id': 345, 'otherkey': -1}] 

204 bads = [{'different_key': 123}] 

205 for good in goods: 

206 self.assertTrue(_expid_present(good)) 

207 for bad in bads: 

208 self.assertFalse(_expid_present(bad)) 

209 

210 def test_getDayObs(self): 

211 dayVal = 98765 

212 goods = [{'day_obs': dayVal}, {'exposure.day_obs': dayVal}, {'day_obs': dayVal, 'otherkey': -1}] 

213 bads = [{'different_key': 123}] 

214 for good in goods: 

215 self.assertTrue(getDayObs(good) == dayVal) 

216 for bad in bads: 

217 self.assertTrue(getDayObs(bad) is None) 

218 

219 def test_getSeqNum(self): 

220 seqVal = 12345 

221 goods = [{'seq_num': seqVal}, {'exposure.seq_num': seqVal}, {'seq_num': seqVal, 'otherkey': -1}] 

222 bads = [{'different_key': 123}] 

223 for good in goods: 

224 self.assertTrue(getSeqNum(good) == seqVal) 

225 for bad in bads: 

226 self.assertTrue(getSeqNum(bad) is None) 

227 

228 def test_getExpId(self): 

229 expIdVal = 12345 

230 goods = [{'exposure': expIdVal}, {'exposure.id': expIdVal}, {'exposure': expIdVal, 'otherkey': -1}] 

231 bads = [{'different_key': 123}] 

232 for good in goods: 

233 self.assertTrue(getExpId(good) == expIdVal) 

234 for bad in bads: 

235 self.assertTrue(getExpId(bad) is None) 

236 

237 def test_datasetExists(self): 

238 self.assertTrue(datasetExists(self.butler, 'raw', self.rawDataId)) 

239 self.assertTrue(datasetExists(self.butler, 'raw', self.expIdOnly)) 

240 self.assertTrue(datasetExists(self.butler, 'raw', self.dayObsSeqNumIdOnly)) 

241 return 

242 

243 def test_sortRecordsByDayObsThenSeqNum(self): 

244 where = "exposure.day_obs=day_obs" 

245 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where, 

246 bind={'day_obs': RECENT_DAY}) 

247 expRecords = list(expRecords) 

248 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

249 random.shuffle(expRecords) # they are often already in order, so make sure they're not 

250 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

251 for i, _id in enumerate(sortedIds[:-1]): 

252 self.assertTrue(_id.seq_num < sortedIds[i+1].seq_num) 

253 

254 # Check that ambiguous sorts raise as expected 

255 with self.assertRaises(ValueError): 

256 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where, 

257 bind={'day_obs': RECENT_DAY}) 

258 expRecords = list(expRecords) 

259 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

260 expRecords.append(expRecords[0]) # add a duplicate 

261 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

262 return 

263 

264 def test_getDaysWithData(self): 

265 days = getDaysWithData(self.butler) 

266 self.assertTrue(len(days) >= 0) 

267 self.assertIsInstance(days[0], int) 

268 return 

269 

270 def test_getExpIdFromDayObsSeqNum(self): 

271 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly) 

272 self.assertTrue(_expid_present(expId)) 

273 return 

274 

275 def test_updateDataIdOrDataCord(self): 

276 updateVals = {'testKey': 'testValue'} 

277 

278 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

279 for originalId in ids: 

280 testId = updateDataIdOrDataCord(originalId, **updateVals) 

281 for k, v in updateVals.items(): 

282 self.assertTrue(testId[k] == v) 

283 return 

284 

285 def test_fillDataId(self): 

286 self.assertFalse(_dayobs_present(self.expIdOnly)) 

287 self.assertFalse(_seqnum_present(self.expIdOnly)) 

288 

289 fullId = fillDataId(self.butler, self.expIdOnly) 

290 self.assertTrue(_dayobs_present(fullId)) 

291 self.assertTrue(_seqnum_present(fullId)) 

292 

293 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

294 for dataId in ids: 

295 fullId = fillDataId(self.butler, dataId) 

296 self.assertTrue(_dayobs_present(fullId)) 

297 self.assertTrue(_seqnum_present(fullId)) 

298 self.assertTrue(_expid_present(fullId)) 

299 return 

300 

301 def test_getExpRecordFromDataId(self): 

302 record = getExpRecordFromDataId(self.butler, self.rawDataId) 

303 self.assertIsInstance(record, dafButler.dimensions.DimensionRecord) 

304 return 

305 

306 def test_getDayObsSeqNumFromExposureId(self): 

307 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly) 

308 self.assertTrue(_dayobs_present(dayObsSeqNum)) 

309 self.assertTrue(_seqnum_present(dayObsSeqNum)) 

310 return 

311 

312 def test_removeDataProduct(self): 

313 # Can't think of an easy or safe test for this 

314 return 

315 

316 def test_getLatissOnSkyDataIds(self): 

317 # This is very slow, consider removing as it's the least import of all 

318 # the util functions. However, restricting it to only the most recent 

319 # day does help a lot, so probably OK like that, and should speed up 

320 # with middleware improvements in the future, and we should ensure 

321 # that they don't break this, so inclined to leave for now 

322 dayToUse = getDaysWithData(self.butler)[-1] 

323 # the most recent day with data might only be biases or flats so make 

324 # sure to override the default of skipping biases, darks & flats 

325 skipTypes = () 

326 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse) 

327 self.assertTrue(len(ids) > 0) 

328 self.assertTrue(ids[0] is not None) 

329 

330 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse, 

331 full=True) 

332 self.assertTrue(len(ids) > 0) 

333 self.assertTrue(ids[0] is not None) 

334 testId = ids[0] 

335 self.assertTrue(_dayobs_present(testId)) 

336 self.assertTrue(_seqnum_present(testId)) 

337 self.assertTrue(_expid_present(testId)) 

338 return 

339 

340 def test__assureDict(self): 

341 for item in [self.rawDataId, self.fullId, self.expIdOnly, 

342 self.expRecordNoDetector, self.dataCoordFullView, 

343 self.dataCoordMinimal, self.rawDataIdNoDayObSeqNum]: 

344 testId = _assureDict(item) 

345 self.assertIsInstance(testId, dict) 

346 return 

347 

348 def test__get_dayobs_key(self): 

349 dataId = {'a_random_key': 321, 'exposure.day_obs': 20200312, 'z_random_key': 'abc'} 

350 self.assertTrue(_get_dayobs_key(dataId) == 'exposure.day_obs') 

351 dataId = {'day_obs': 20200312} 

352 self.assertTrue(_get_dayobs_key(dataId) == 'day_obs') 

353 dataId = {'missing': 20200312} 

354 self.assertTrue(_get_dayobs_key(dataId) is None) 

355 return 

356 

357 def test__get_seqnum_key(self): 

358 dataId = {'a_random_key': 321, 'exposure.seq_num': 123, 'z_random_key': 'abc'} 

359 self.assertTrue(_get_seqnum_key(dataId) == 'exposure.seq_num') 

360 dataId = {'seq_num': 123} 

361 self.assertTrue(_get_seqnum_key(dataId) == 'seq_num') 

362 dataId = {'missing': 123} 

363 self.assertTrue(_get_seqnum_key(dataId) is None) 

364 return 

365 

366 def test__get_expid_key(self): 

367 dataId = {'a_random_key': 321, 'exposure.id': 123, 'z_random_key': 'abc'} 

368 self.assertTrue(_get_expid_key(dataId) == 'exposure.id') 

369 dataId = {'a_random_key': 321, 'exposure': 123, 'z_random_key': 'abc'} 

370 self.assertTrue(_get_expid_key(dataId) == 'exposure') 

371 dataId = {'missing': 123} 

372 self.assertTrue(_get_expid_key(dataId) is None) 

373 return 

374 

375 

376class ButlerInitTestCase(lsst.utils.tests.TestCase): 

377 """Separately test whether we can make a butler with the env var set 

378 and that the expected error type is raised and passed through when it is 

379 not, as this is relied upon to correctly skip tests when butler init is 

380 not possible. 

381 """ 

382 

383 def test_dafButlerRaiseTypes(self): 

384 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then 

385 # using an instrument label raises a FileNotFoundError 

386 with unittest.mock.patch.dict('os.environ'): 

387 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there 

388 del os.environ['DAF_BUTLER_REPOSITORY_INDEX'] 

389 with self.assertRaises(FileNotFoundError): 

390 dafButler.Butler('LATISS') 

391 

392 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty 

393 # string then using a label raises a RuntimeError 

394 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}): 

395 with self.assertRaises(RuntimeError): 

396 dafButler.Butler('LATISS') 

397 

398 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given 

399 # camera existing, but we can check that we get the expected error 

400 # when trying to init an instrument which definitely won't be defined. 

401 if os.getenv('DAF_BUTLER_REPOSITORY_INDEX'): 

402 with self.assertRaises(FileNotFoundError): 

403 dafButler.Butler('NotAValidCameraName') 

404 

405 def test_makeDefaultLatissButlerRaiseTypes(self): 

406 """makeDefaultLatissButler unifies the mixed exception types from 

407 butler inits, so test all available possibilities here. 

408 """ 

409 with unittest.mock.patch.dict('os.environ'): 

410 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there 

411 del os.environ['DAF_BUTLER_REPOSITORY_INDEX'] 

412 with self.assertRaises(FileNotFoundError): 

413 makeDefaultLatissButler() 

414 

415 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}): 

416 with self.assertRaises(FileNotFoundError): 

417 makeDefaultLatissButler() 

418 

419 fakeFile = '/path/to/a/file/which/does/not_exist.yaml' 

420 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}): 

421 with self.assertRaises(FileNotFoundError): 

422 makeDefaultLatissButler() 

423 

424 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self): 

425 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point 

426 # to an actual file 

427 repoFile = os.getenv('DAF_BUTLER_REPOSITORY_INDEX') 

428 if repoFile: 

429 self.assertTrue(ResourcePath(repoFile).exists()) 

430 

431 

432class TestMemory(lsst.utils.tests.MemoryTestCase): 

433 pass 

434 

435 

436def setup_module(module): 

437 lsst.utils.tests.init() 

438 

439 

440if __name__ == "__main__": 440 ↛ 441line 440 didn't jump to line 441, because the condition on line 440 was never true

441 lsst.utils.tests.init() 

442 unittest.main()