Coverage for tests/test_butlerUtils.py: 14%

273 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-15 03:19 -0800

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24from typing import Iterable 

25import datetime 

26import random 

27 

28import lsst.utils.tests 

29from lsst.summit.utils.butlerUtils import (makeDefaultLatissButler, 

30 sanitize_day_obs, 

31 getMostRecentDayObs, 

32 getSeqNumsForDayObs, 

33 getMostRecentDataId, 

34 getDatasetRefForDataId, 

35 _dayobs_present, 

36 _seqnum_present, 

37 _expid_present, 

38 _get_dayobs_key, 

39 _get_seqnum_key, 

40 _get_expid_key, 

41 getDayObs, 

42 getSeqNum, 

43 getExpId, 

44 datasetExists, 

45 sortRecordsByDayObsThenSeqNum, 

46 getDaysWithData, 

47 getExpIdFromDayObsSeqNum, 

48 updateDataIdOrDataCord, 

49 fillDataId, 

50 getExpRecordFromDataId, 

51 getDayObsSeqNumFromExposureId, 

52 getLatissOnSkyDataIds, 

53 _assureDict, 

54 getLatissDefaultCollections, 

55 RECENT_DAY, 

56 ) 

57from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401 

58import lsst.daf.butler as dafButler 

59from lsst.resources import ResourcePath 

60 

61 

62class ButlerUtilsTestCase(lsst.utils.tests.TestCase): 

63 """A test case for testing sky position offsets for exposures.""" 

64 

65 def setUp(self): 

66 # this also functions as test_makeDefaultLatissButler(), but we may as 

67 # well catch the butler once it's made so it can be reused if needed, 

68 # given how hard it is to made it robustly 

69 

70 # butler stuff 

71 try: 

72 self.butler = makeDefaultLatissButler() 

73 except FileNotFoundError: 

74 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.") 

75 self.assertIsInstance(self.butler, dafButler.Butler) 

76 

77 # dict-like dataIds 

78 self.rawDataId = getMostRecentDataId(self.butler) 

79 self.fullId = fillDataId(self.butler, self.rawDataId) 

80 self.assertIn('exposure', self.fullId) 

81 self.assertIn('day_obs', self.fullId) 

82 self.assertIn('seq_num', self.fullId) 

83 self.expIdOnly = {'exposure': self.fullId['exposure'], 'detector': 0} 

84 self.dayObsSeqNumIdOnly = {'day_obs': getDayObs(self.fullId), 'seq_num': getSeqNum(self.fullId), 

85 'detector': 0} 

86 

87 # expRecords 

88 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId) 

89 self.assertIsInstance(self.expRecordNoDetector, dafButler.dimensions.DimensionRecord) 

90 self.assertFalse(hasattr(self.expRecordNoDetector, 'detector')) 

91 # just a crosscheck on the above to make sure other things are correct 

92 self.assertTrue(hasattr(self.expRecordNoDetector, 'instrument')) 

93 

94 # data coordinates 

95 # popping here because butler.registry.expandDataId cannot have 

96 # day_obs or seq_num present right now 

97 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId) 

98 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum): 

99 rawDataIdNoDayObSeqNum.pop(dayObsKey) 

100 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum): 

101 rawDataIdNoDayObSeqNum.pop(seqNumKey) 

102 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum 

103 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0) 

104 self.dataCoordFullView = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, 

105 detector=0).full 

106 self.assertIsInstance(self.dataCoordMinimal, dafButler.dimensions.DataCoordinate) 

107 # NB the type check below is currently using a non-public API, but 

108 # at present there isn't a good alternative 

109 viewType = dafButler.core.dimensions._coordinate._DataCoordinateFullView 

110 self.assertIsInstance(self.dataCoordFullView, viewType) 

111 

112 def test_getLatissDefaultCollections(self): 

113 defaultCollections = getLatissDefaultCollections() 

114 self.assertTrue(defaultCollections is not None) 

115 self.assertTrue(defaultCollections != []) 

116 self.assertTrue(len(defaultCollections) >= 1) 

117 

118 def test_RECENT_DAY(self): 

119 todayInt = int(datetime.date.today().strftime("%Y%m%d")) 

120 self.assertTrue(RECENT_DAY <= todayInt) # in the past 

121 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past 

122 

123 # check that the value of RECENT_DAY is before the end of the data. 

124 daysWithData = getDaysWithData(self.butler) 

125 self.assertLessEqual(RECENT_DAY, max(daysWithData)) 

126 

127 # no test here, but print a warning if it hasn't been updated recently 

128 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d") 

129 now = datetime.datetime.today() 

130 timeSinceUpdate = now - recentDay_datetime 

131 if timeSinceUpdate.days > 100: # TODO: 

132 print(f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. " 

133 "You might want to consider updating this to speed up butler queries.") 

134 

135 def test_sanitize_day_obs(self): 

136 dayObs = '2020-01-02' 

137 self.assertEqual(sanitize_day_obs(dayObs), 20200102) 

138 dayObs = 20210201 

139 self.assertEqual(sanitize_day_obs(dayObs), dayObs) 

140 

141 with self.assertRaises(ValueError): 

142 sanitize_day_obs(1.234) 

143 sanitize_day_obs('Febuary 29th, 1970') 

144 

145 def test_getMostRecentDayObs(self): 

146 # just a basic sanity check here as we can't know the value, 

147 # but at least check something is returned, and is plausible 

148 recentDay = getMostRecentDayObs(self.butler) 

149 self.assertIsInstance(recentDay, int) 

150 self.assertTrue(recentDay >= RECENT_DAY) 

151 # some test data might be set a millennium in the future, i.e. 

152 # the year wouldd be 2XXX+1000, so set to y4k just in case 

153 self.assertTrue(recentDay < 40000000) 

154 

155 def test_getSeqNumsForDayObs(self): 

156 emptyDay = 19990101 

157 seqnums = getSeqNumsForDayObs(self.butler, emptyDay) 

158 self.assertIsInstance(seqnums, Iterable) 

159 self.assertEqual(len(list(seqnums)), 0) 

160 

161 recentDay = getMostRecentDayObs(self.butler) 

162 seqnums = getSeqNumsForDayObs(self.butler, recentDay) 

163 self.assertIsInstance(seqnums, Iterable) 

164 self.assertTrue(len(list(seqnums)) >= 1) 

165 

166 def test_getMostRecentDataId(self): 

167 # we can't know the values, but it should always return something 

168 # and the dict and int forms should always have certain keys and agree 

169 dataId = getMostRecentDataId(self.butler) 

170 self.assertIsInstance(dataId, dict) 

171 self.assertIn('day_obs', dataId) 

172 self.assertIn('seq_num', dataId) 

173 self.assertTrue('exposure' in dataId or 'exposure.id' in dataId) 

174 

175 def test_getDatasetRefForDataId(self): 

176 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataId) 

177 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

178 

179 def test__dayobs_present(self): 

180 goods = [{'day_obs': 123}, {'exposure.day_obs': 234}, {'day_obs': 345, 'otherkey': -1}] 

181 bads = [{'different_key': 123}] 

182 for good in goods: 

183 self.assertTrue(_dayobs_present(good)) 

184 for bad in bads: 

185 self.assertFalse(_dayobs_present(bad)) 

186 

187 def test__seqnum_present(self): 

188 goods = [{'seq_num': 123}, {'exposure.seq_num': 234}, {'seq_num': 345, 'otherkey': -1}] 

189 bads = [{'different_key': 123}] 

190 for good in goods: 

191 self.assertTrue(_seqnum_present(good)) 

192 for bad in bads: 

193 self.assertFalse(_seqnum_present(bad)) 

194 

195 def test__expid_present(self): 

196 goods = [{'exposure': 123}, {'exposure.id': 234}, {'exposure.id': 345, 'otherkey': -1}] 

197 bads = [{'different_key': 123}] 

198 for good in goods: 

199 self.assertTrue(_expid_present(good)) 

200 for bad in bads: 

201 self.assertFalse(_expid_present(bad)) 

202 

203 def test_getDayObs(self): 

204 dayVal = 98765 

205 goods = [{'day_obs': dayVal}, {'exposure.day_obs': dayVal}, {'day_obs': dayVal, 'otherkey': -1}] 

206 bads = [{'different_key': 123}] 

207 for good in goods: 

208 self.assertTrue(getDayObs(good) == dayVal) 

209 for bad in bads: 

210 self.assertTrue(getDayObs(bad) is None) 

211 

212 def test_getSeqNum(self): 

213 seqVal = 12345 

214 goods = [{'seq_num': seqVal}, {'exposure.seq_num': seqVal}, {'seq_num': seqVal, 'otherkey': -1}] 

215 bads = [{'different_key': 123}] 

216 for good in goods: 

217 self.assertTrue(getSeqNum(good) == seqVal) 

218 for bad in bads: 

219 self.assertTrue(getSeqNum(bad) is None) 

220 

221 def test_getExpId(self): 

222 expIdVal = 12345 

223 goods = [{'exposure': expIdVal}, {'exposure.id': expIdVal}, {'exposure': expIdVal, 'otherkey': -1}] 

224 bads = [{'different_key': 123}] 

225 for good in goods: 

226 self.assertTrue(getExpId(good) == expIdVal) 

227 for bad in bads: 

228 self.assertTrue(getExpId(bad) is None) 

229 

230 def test_datasetExists(self): 

231 self.assertTrue(datasetExists(self.butler, 'raw', self.rawDataId)) 

232 self.assertTrue(datasetExists(self.butler, 'raw', self.expIdOnly)) 

233 self.assertTrue(datasetExists(self.butler, 'raw', self.dayObsSeqNumIdOnly)) 

234 return 

235 

236 def test_sortRecordsByDayObsThenSeqNum(self): 

237 where = "exposure.day_obs=day_obs" 

238 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where, 

239 bind={'day_obs': RECENT_DAY}) 

240 expRecords = list(expRecords) 

241 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

242 random.shuffle(expRecords) # they are often already in order, so make sure they're not 

243 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

244 for i, _id in enumerate(sortedIds[:-1]): 

245 self.assertTrue(_id.seq_num < sortedIds[i+1].seq_num) 

246 

247 # Check that ambiguous sorts raise as expected 

248 with self.assertRaises(ValueError): 

249 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where, 

250 bind={'day_obs': RECENT_DAY}) 

251 expRecords = list(expRecords) 

252 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

253 expRecords.append(expRecords[0]) # add a duplicate 

254 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

255 return 

256 

257 def test_getDaysWithData(self): 

258 days = getDaysWithData(self.butler) 

259 self.assertTrue(len(days) >= 0) 

260 self.assertIsInstance(days[0], int) 

261 return 

262 

263 def test_getExpIdFromDayObsSeqNum(self): 

264 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly) 

265 self.assertTrue(_expid_present(expId)) 

266 return 

267 

268 def test_updateDataIdOrDataCord(self): 

269 updateVals = {'testKey': 'testValue'} 

270 

271 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

272 for originalId in ids: 

273 testId = updateDataIdOrDataCord(originalId, **updateVals) 

274 for k, v in updateVals.items(): 

275 self.assertTrue(testId[k] == v) 

276 return 

277 

278 def test_fillDataId(self): 

279 self.assertFalse(_dayobs_present(self.expIdOnly)) 

280 self.assertFalse(_seqnum_present(self.expIdOnly)) 

281 

282 fullId = fillDataId(self.butler, self.expIdOnly) 

283 self.assertTrue(_dayobs_present(fullId)) 

284 self.assertTrue(_seqnum_present(fullId)) 

285 

286 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

287 for dataId in ids: 

288 fullId = fillDataId(self.butler, dataId) 

289 self.assertTrue(_dayobs_present(fullId)) 

290 self.assertTrue(_seqnum_present(fullId)) 

291 self.assertTrue(_expid_present(fullId)) 

292 return 

293 

294 def test_getExpRecordFromDataId(self): 

295 record = getExpRecordFromDataId(self.butler, self.rawDataId) 

296 self.assertIsInstance(record, dafButler.dimensions.DimensionRecord) 

297 return 

298 

299 def test_getDayObsSeqNumFromExposureId(self): 

300 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly) 

301 self.assertTrue(_dayobs_present(dayObsSeqNum)) 

302 self.assertTrue(_seqnum_present(dayObsSeqNum)) 

303 return 

304 

305 def test_removeDataProduct(self): 

306 # Can't think of an easy or safe test for this 

307 return 

308 

309 def test_getLatissOnSkyDataIds(self): 

310 # This is very slow, consider removing as it's the least import of all 

311 # the util functions. However, restricting it to only the most recent 

312 # day does help a lot, so probably OK like that, and should speed up 

313 # with middleware improvements in the future, and we should ensure 

314 # that they don't break this, so inclined to leave for now 

315 dayToUse = getDaysWithData(self.butler)[-1] 

316 # the most recent day with data might only be biases or flats so make 

317 # sure to override the default of skipping biases, darks & flats 

318 skipTypes = () 

319 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse) 

320 self.assertTrue(len(ids) > 0) 

321 self.assertTrue(ids[0] is not None) 

322 

323 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse, 

324 full=True) 

325 self.assertTrue(len(ids) > 0) 

326 self.assertTrue(ids[0] is not None) 

327 testId = ids[0] 

328 self.assertTrue(_dayobs_present(testId)) 

329 self.assertTrue(_seqnum_present(testId)) 

330 self.assertTrue(_expid_present(testId)) 

331 return 

332 

333 def test__assureDict(self): 

334 for item in [self.rawDataId, self.fullId, self.expIdOnly, 

335 self.expRecordNoDetector, self.dataCoordFullView, 

336 self.dataCoordMinimal, self.rawDataIdNoDayObSeqNum]: 

337 testId = _assureDict(item) 

338 self.assertIsInstance(testId, dict) 

339 return 

340 

341 def test__get_dayobs_key(self): 

342 dataId = {'a_random_key': 321, 'exposure.day_obs': 20200312, 'z_random_key': 'abc'} 

343 self.assertTrue(_get_dayobs_key(dataId) == 'exposure.day_obs') 

344 dataId = {'day_obs': 20200312} 

345 self.assertTrue(_get_dayobs_key(dataId) == 'day_obs') 

346 dataId = {'missing': 20200312} 

347 self.assertTrue(_get_dayobs_key(dataId) is None) 

348 return 

349 

350 def test__get_seqnum_key(self): 

351 dataId = {'a_random_key': 321, 'exposure.seq_num': 123, 'z_random_key': 'abc'} 

352 self.assertTrue(_get_seqnum_key(dataId) == 'exposure.seq_num') 

353 dataId = {'seq_num': 123} 

354 self.assertTrue(_get_seqnum_key(dataId) == 'seq_num') 

355 dataId = {'missing': 123} 

356 self.assertTrue(_get_seqnum_key(dataId) is None) 

357 return 

358 

359 def test__get_expid_key(self): 

360 dataId = {'a_random_key': 321, 'exposure.id': 123, 'z_random_key': 'abc'} 

361 self.assertTrue(_get_expid_key(dataId) == 'exposure.id') 

362 dataId = {'a_random_key': 321, 'exposure': 123, 'z_random_key': 'abc'} 

363 self.assertTrue(_get_expid_key(dataId) == 'exposure') 

364 dataId = {'missing': 123} 

365 self.assertTrue(_get_expid_key(dataId) is None) 

366 return 

367 

368 

369class ButlerInitTestCase(lsst.utils.tests.TestCase): 

370 """Separately test whether we can make a butler with the env var set 

371 and that the expected error type is raised and passed through when it is 

372 not, as this is relied upon to correctly skip tests when butler init is 

373 not possible. 

374 """ 

375 

376 def test_dafButlerRaiseTypes(self): 

377 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then 

378 # using an instrument label raises a FileNotFoundError 

379 with unittest.mock.patch.dict('os.environ'): 

380 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there 

381 del os.environ['DAF_BUTLER_REPOSITORY_INDEX'] 

382 with self.assertRaises(FileNotFoundError): 

383 dafButler.Butler('LATISS') 

384 

385 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty 

386 # string then using a label raises a RuntimeError 

387 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}): 

388 with self.assertRaises(RuntimeError): 

389 dafButler.Butler('LATISS') 

390 

391 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given 

392 # camera existing, but we can check that we get the expected error 

393 # when trying to init an instrument which definitely won't be defined. 

394 if os.getenv('DAF_BUTLER_REPOSITORY_INDEX'): 

395 with self.assertRaises(FileNotFoundError): 

396 dafButler.Butler('NotAValidCameraName') 

397 

398 def test_makeDefaultLatissButlerRaiseTypes(self): 

399 """makeDefaultLatissButler unifies the mixed exception types from 

400 butler inits, so test all available possibilities here. 

401 """ 

402 with unittest.mock.patch.dict('os.environ'): 

403 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there 

404 del os.environ['DAF_BUTLER_REPOSITORY_INDEX'] 

405 with self.assertRaises(FileNotFoundError): 

406 makeDefaultLatissButler() 

407 

408 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}): 

409 with self.assertRaises(FileNotFoundError): 

410 makeDefaultLatissButler() 

411 

412 fakeFile = '/path/to/a/file/which/does/not_exist.yaml' 

413 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}): 

414 with self.assertRaises(FileNotFoundError): 

415 makeDefaultLatissButler() 

416 

417 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self): 

418 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point 

419 # to an actual file 

420 repoFile = os.getenv('DAF_BUTLER_REPOSITORY_INDEX') 

421 if repoFile: 

422 self.assertTrue(ResourcePath(repoFile).exists()) 

423 

424 

425class TestMemory(lsst.utils.tests.MemoryTestCase): 

426 pass 

427 

428 

429def setup_module(module): 

430 lsst.utils.tests.init() 

431 

432 

433if __name__ == "__main__": 433 ↛ 434line 433 didn't jump to line 434, because the condition on line 433 was never true

434 lsst.utils.tests.init() 

435 unittest.main()