Coverage for tests/test_butlerUtils.py: 14%

294 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-31 04:33 -0700

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24from typing import Iterable 

25import datetime 

26import random 

27import copy 

28 

29import lsst.utils.tests 

30from lsst.summit.utils.butlerUtils import (makeDefaultLatissButler, 

31 updateDataId, 

32 sanitize_day_obs, 

33 getMostRecentDayObs, 

34 getSeqNumsForDayObs, 

35 getMostRecentDataId, 

36 getDatasetRefForDataId, 

37 _dayobs_present, 

38 _seqnum_present, 

39 _expid_present, 

40 _get_dayobs_key, 

41 _get_seqnum_key, 

42 _get_expid_key, 

43 getDayObs, 

44 getSeqNum, 

45 getExpId, 

46 datasetExists, 

47 sortRecordsByDayObsThenSeqNum, 

48 getDaysWithData, 

49 getExpIdFromDayObsSeqNum, 

50 updateDataIdOrDataCord, 

51 fillDataId, 

52 getExpRecordFromDataId, 

53 getDayObsSeqNumFromExposureId, 

54 getLatissOnSkyDataIds, 

55 _assureDict, 

56 getLatissDefaultCollections, 

57 RECENT_DAY, 

58 ) 

59from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401 

60import lsst.daf.butler as dafButler 

61from lsst.resources import ResourcePath 

62 

63 

64class ButlerUtilsTestCase(lsst.utils.tests.TestCase): 

65 """A test case for testing sky position offsets for exposures.""" 

66 

67 def setUp(self): 

68 # this also functions as test_makeDefaultLatissButler(), but we may as 

69 # well catch the butler once it's made so it can be reused if needed, 

70 # given how hard it is to made it robustly 

71 

72 # butler stuff 

73 try: 

74 self.butler = makeDefaultLatissButler() 

75 except FileNotFoundError: 

76 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.") 

77 self.assertIsInstance(self.butler, dafButler.Butler) 

78 

79 # dict-like dataIds 

80 self.rawDataId = getMostRecentDataId(self.butler) 

81 self.fullId = fillDataId(self.butler, self.rawDataId) 

82 self.assertIn('exposure', self.fullId) 

83 self.assertIn('day_obs', self.fullId) 

84 self.assertIn('seq_num', self.fullId) 

85 self.expIdOnly = {'exposure': self.fullId['exposure'], 'detector': 0} 

86 self.dayObsSeqNumIdOnly = {'day_obs': getDayObs(self.fullId), 'seq_num': getSeqNum(self.fullId), 

87 'detector': 0} 

88 

89 # expRecords 

90 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId) 

91 self.assertIsInstance(self.expRecordNoDetector, dafButler.dimensions.DimensionRecord) 

92 self.assertFalse(hasattr(self.expRecordNoDetector, 'detector')) 

93 self.assertFalse('detector' in self.expRecordNoDetector.dataId) 

94 # just a crosscheck on the above to make sure other things are correct 

95 self.assertTrue(hasattr(self.expRecordNoDetector, 'instrument')) 

96 

97 # data coordinates 

98 # popping here because butler.registry.expandDataId cannot have 

99 # day_obs or seq_num present right now 

100 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId) 

101 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum): 

102 rawDataIdNoDayObSeqNum.pop(dayObsKey) 

103 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum): 

104 rawDataIdNoDayObSeqNum.pop(seqNumKey) 

105 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum 

106 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0) 

107 self.dataCoordFullView = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, 

108 detector=0).full 

109 self.assertIsInstance(self.dataCoordMinimal, dafButler.dimensions.DataCoordinate) 

110 # NB the type check below is currently using a non-public API, but 

111 # at present there isn't a good alternative 

112 viewType = dafButler.core.dimensions._coordinate._DataCoordinateFullView 

113 self.assertIsInstance(self.dataCoordFullView, viewType) 

114 

115 def test_getLatissDefaultCollections(self): 

116 defaultCollections = getLatissDefaultCollections() 

117 self.assertTrue(defaultCollections is not None) 

118 self.assertTrue(defaultCollections != []) 

119 self.assertTrue(len(defaultCollections) >= 1) 

120 

121 def test_RECENT_DAY(self): 

122 todayInt = int(datetime.date.today().strftime("%Y%m%d")) 

123 self.assertTrue(RECENT_DAY <= todayInt) # in the past 

124 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past 

125 

126 # check that the value of RECENT_DAY is before the end of the data. 

127 daysWithData = getDaysWithData(self.butler) 

128 self.assertLessEqual(RECENT_DAY, max(daysWithData)) 

129 

130 # no test here, but print a warning if it hasn't been updated recently 

131 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d") 

132 now = datetime.datetime.today() 

133 timeSinceUpdate = now - recentDay_datetime 

134 if timeSinceUpdate.days > 100: # TODO: 

135 print(f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. " 

136 "You might want to consider updating this to speed up butler queries.") 

137 

138 def test_sanitize_day_obs(self): 

139 dayObs = '2020-01-02' 

140 self.assertEqual(sanitize_day_obs(dayObs), 20200102) 

141 dayObs = 20210201 

142 self.assertEqual(sanitize_day_obs(dayObs), dayObs) 

143 

144 with self.assertRaises(ValueError): 

145 sanitize_day_obs(1.234) 

146 sanitize_day_obs('Febuary 29th, 1970') 

147 

148 def test_getMostRecentDayObs(self): 

149 # just a basic sanity check here as we can't know the value, 

150 # but at least check something is returned, and is plausible 

151 recentDay = getMostRecentDayObs(self.butler) 

152 self.assertIsInstance(recentDay, int) 

153 self.assertTrue(recentDay >= RECENT_DAY) 

154 # some test data might be set a millennium in the future, i.e. 

155 # the year wouldd be 2XXX+1000, so set to y4k just in case 

156 self.assertTrue(recentDay < 40000000) 

157 

158 def test_getSeqNumsForDayObs(self): 

159 emptyDay = 19990101 

160 seqnums = getSeqNumsForDayObs(self.butler, emptyDay) 

161 self.assertIsInstance(seqnums, Iterable) 

162 self.assertEqual(len(list(seqnums)), 0) 

163 

164 recentDay = getMostRecentDayObs(self.butler) 

165 seqnums = getSeqNumsForDayObs(self.butler, recentDay) 

166 self.assertIsInstance(seqnums, Iterable) 

167 self.assertTrue(len(list(seqnums)) >= 1) 

168 

169 def test_getMostRecentDataId(self): 

170 # we can't know the values, but it should always return something 

171 # and the dict and int forms should always have certain keys and agree 

172 dataId = getMostRecentDataId(self.butler) 

173 self.assertIsInstance(dataId, dict) 

174 self.assertIn('day_obs', dataId) 

175 self.assertIn('seq_num', dataId) 

176 self.assertTrue('exposure' in dataId or 'exposure.id' in dataId) 

177 

178 def test_getDatasetRefForDataId(self): 

179 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataId) 

180 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

181 

182 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataIdNoDayObSeqNum) 

183 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

184 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordMinimal) 

185 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

186 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordFullView) 

187 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

188 

189 def test__dayobs_present(self): 

190 goods = [{'day_obs': 123}, {'exposure.day_obs': 234}, {'day_obs': 345, 'otherkey': -1}] 

191 bads = [{'different_key': 123}] 

192 for good in goods: 

193 self.assertTrue(_dayobs_present(good)) 

194 for bad in bads: 

195 self.assertFalse(_dayobs_present(bad)) 

196 

197 def test__seqnum_present(self): 

198 goods = [{'seq_num': 123}, {'exposure.seq_num': 234}, {'seq_num': 345, 'otherkey': -1}] 

199 bads = [{'different_key': 123}] 

200 for good in goods: 

201 self.assertTrue(_seqnum_present(good)) 

202 for bad in bads: 

203 self.assertFalse(_seqnum_present(bad)) 

204 

205 def test__expid_present(self): 

206 goods = [{'exposure': 123}, {'exposure.id': 234}, {'exposure.id': 345, 'otherkey': -1}] 

207 bads = [{'different_key': 123}] 

208 for good in goods: 

209 self.assertTrue(_expid_present(good)) 

210 for bad in bads: 

211 self.assertFalse(_expid_present(bad)) 

212 

213 def test_getDayObs(self): 

214 dayVal = 98765 

215 goods = [{'day_obs': dayVal}, {'exposure.day_obs': dayVal}, {'day_obs': dayVal, 'otherkey': -1}] 

216 bads = [{'different_key': 123}] 

217 for good in goods: 

218 self.assertTrue(getDayObs(good) == dayVal) 

219 for bad in bads: 

220 self.assertTrue(getDayObs(bad) is None) 

221 

222 def test_getSeqNum(self): 

223 seqVal = 12345 

224 goods = [{'seq_num': seqVal}, {'exposure.seq_num': seqVal}, {'seq_num': seqVal, 'otherkey': -1}] 

225 bads = [{'different_key': 123}] 

226 for good in goods: 

227 self.assertTrue(getSeqNum(good) == seqVal) 

228 for bad in bads: 

229 self.assertTrue(getSeqNum(bad) is None) 

230 

231 def test_getExpId(self): 

232 expIdVal = 12345 

233 goods = [{'exposure': expIdVal}, {'exposure.id': expIdVal}, {'exposure': expIdVal, 'otherkey': -1}] 

234 bads = [{'different_key': 123}] 

235 for good in goods: 

236 self.assertTrue(getExpId(good) == expIdVal) 

237 for bad in bads: 

238 self.assertTrue(getExpId(bad) is None) 

239 

240 def test_datasetExists(self): 

241 self.assertTrue(datasetExists(self.butler, 'raw', self.rawDataId)) 

242 self.assertTrue(datasetExists(self.butler, 'raw', self.expIdOnly)) 

243 self.assertTrue(datasetExists(self.butler, 'raw', self.dayObsSeqNumIdOnly)) 

244 return 

245 

246 def test_sortRecordsByDayObsThenSeqNum(self): 

247 where = "exposure.day_obs=day_obs" 

248 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where, 

249 bind={'day_obs': RECENT_DAY}) 

250 expRecords = list(expRecords) 

251 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

252 random.shuffle(expRecords) # they are often already in order, so make sure they're not 

253 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

254 for i, _id in enumerate(sortedIds[:-1]): 

255 self.assertTrue(_id.seq_num < sortedIds[i+1].seq_num) 

256 

257 # Check that ambiguous sorts raise as expected 

258 with self.assertRaises(ValueError): 

259 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where, 

260 bind={'day_obs': RECENT_DAY}) 

261 expRecords = list(expRecords) 

262 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

263 expRecords.append(expRecords[0]) # add a duplicate 

264 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

265 return 

266 

267 def test_getDaysWithData(self): 

268 days = getDaysWithData(self.butler) 

269 self.assertTrue(len(days) >= 0) 

270 self.assertIsInstance(days[0], int) 

271 return 

272 

273 def test_getExpIdFromDayObsSeqNum(self): 

274 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly) 

275 self.assertTrue(_expid_present(expId)) 

276 return 

277 

278 def test_updateDataIdOrDataCord(self): 

279 updateVals = {'testKey': 'testValue'} 

280 

281 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

282 for originalId in ids: 

283 testId = updateDataIdOrDataCord(originalId, **updateVals) 

284 for k, v in updateVals.items(): 

285 self.assertTrue(testId[k] == v) 

286 return 

287 

288 def test_fillDataId(self): 

289 self.assertFalse(_dayobs_present(self.expIdOnly)) 

290 self.assertFalse(_seqnum_present(self.expIdOnly)) 

291 

292 fullId = fillDataId(self.butler, self.expIdOnly) 

293 self.assertTrue(_dayobs_present(fullId)) 

294 self.assertTrue(_seqnum_present(fullId)) 

295 

296 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

297 for dataId in ids: 

298 fullId = fillDataId(self.butler, dataId) 

299 self.assertTrue(_dayobs_present(fullId)) 

300 self.assertTrue(_seqnum_present(fullId)) 

301 self.assertTrue(_expid_present(fullId)) 

302 return 

303 

304 def test_getExpRecordFromDataId(self): 

305 record = getExpRecordFromDataId(self.butler, self.rawDataId) 

306 self.assertIsInstance(record, dafButler.dimensions.DimensionRecord) 

307 return 

308 

309 def test_getDayObsSeqNumFromExposureId(self): 

310 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly) 

311 self.assertTrue(_dayobs_present(dayObsSeqNum)) 

312 self.assertTrue(_seqnum_present(dayObsSeqNum)) 

313 return 

314 

315 def test_removeDataProduct(self): 

316 # Can't think of an easy or safe test for this 

317 return 

318 

319 def test_getLatissOnSkyDataIds(self): 

320 # This is very slow, consider removing as it's the least import of all 

321 # the util functions. However, restricting it to only the most recent 

322 # day does help a lot, so probably OK like that, and should speed up 

323 # with middleware improvements in the future, and we should ensure 

324 # that they don't break this, so inclined to leave for now 

325 dayToUse = getDaysWithData(self.butler)[-1] 

326 # the most recent day with data might only be biases or flats so make 

327 # sure to override the default of skipping biases, darks & flats 

328 skipTypes = () 

329 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse) 

330 self.assertTrue(len(ids) > 0) 

331 self.assertTrue(ids[0] is not None) 

332 

333 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse, 

334 full=True) 

335 self.assertTrue(len(ids) > 0) 

336 self.assertTrue(ids[0] is not None) 

337 testId = ids[0] 

338 self.assertTrue(_dayobs_present(testId)) 

339 self.assertTrue(_seqnum_present(testId)) 

340 self.assertTrue(_expid_present(testId)) 

341 return 

342 

343 def test__assureDict(self): 

344 for item in [self.rawDataId, self.fullId, self.expIdOnly, 

345 self.expRecordNoDetector, self.dataCoordFullView, 

346 self.dataCoordMinimal, self.rawDataIdNoDayObSeqNum]: 

347 testId = _assureDict(item) 

348 self.assertIsInstance(testId, dict) 

349 return 

350 

351 def test__get_dayobs_key(self): 

352 dataId = {'a_random_key': 321, 'exposure.day_obs': 20200312, 'z_random_key': 'abc'} 

353 self.assertTrue(_get_dayobs_key(dataId) == 'exposure.day_obs') 

354 dataId = {'day_obs': 20200312} 

355 self.assertTrue(_get_dayobs_key(dataId) == 'day_obs') 

356 dataId = {'missing': 20200312} 

357 self.assertTrue(_get_dayobs_key(dataId) is None) 

358 return 

359 

360 def test__get_seqnum_key(self): 

361 dataId = {'a_random_key': 321, 'exposure.seq_num': 123, 'z_random_key': 'abc'} 

362 self.assertTrue(_get_seqnum_key(dataId) == 'exposure.seq_num') 

363 dataId = {'seq_num': 123} 

364 self.assertTrue(_get_seqnum_key(dataId) == 'seq_num') 

365 dataId = {'missing': 123} 

366 self.assertTrue(_get_seqnum_key(dataId) is None) 

367 return 

368 

369 def test__get_expid_key(self): 

370 dataId = {'a_random_key': 321, 'exposure.id': 123, 'z_random_key': 'abc'} 

371 self.assertTrue(_get_expid_key(dataId) == 'exposure.id') 

372 dataId = {'a_random_key': 321, 'exposure': 123, 'z_random_key': 'abc'} 

373 self.assertTrue(_get_expid_key(dataId) == 'exposure') 

374 dataId = {'missing': 123} 

375 self.assertTrue(_get_expid_key(dataId) is None) 

376 return 

377 

378 def test_updateDataId(self): 

379 # check with a dataCoordinate 

380 dataId = copy.copy(self.expRecordNoDetector.dataId) 

381 self.assertTrue('detector' not in dataId) 

382 dataId = updateDataId(dataId, detector=123) 

383 self.assertTrue('detector' in dataId) 

384 self.assertEqual(dataId['detector'], 123) 

385 

386 # check with a dict 

387 self.assertIsInstance(self.rawDataId, dict) 

388 dataId = copy.copy(self.rawDataId) 

389 dataId.pop('detector') 

390 self.assertTrue('detector' not in dataId) 

391 dataId = updateDataId(dataId, detector=321) 

392 self.assertTrue('detector' in dataId) 

393 self.assertEqual(dataId['detector'], 321) 

394 

395 

396class ButlerInitTestCase(lsst.utils.tests.TestCase): 

397 """Separately test whether we can make a butler with the env var set 

398 and that the expected error type is raised and passed through when it is 

399 not, as this is relied upon to correctly skip tests when butler init is 

400 not possible. 

401 """ 

402 

403 def test_dafButlerRaiseTypes(self): 

404 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then 

405 # using an instrument label raises a FileNotFoundError 

406 with unittest.mock.patch.dict('os.environ'): 

407 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there 

408 del os.environ['DAF_BUTLER_REPOSITORY_INDEX'] 

409 with self.assertRaises(FileNotFoundError): 

410 dafButler.Butler('LATISS') 

411 

412 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty 

413 # string then using a label raises a RuntimeError 

414 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}): 

415 with self.assertRaises(RuntimeError): 

416 dafButler.Butler('LATISS') 

417 

418 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given 

419 # camera existing, but we can check that we get the expected error 

420 # when trying to init an instrument which definitely won't be defined. 

421 if os.getenv('DAF_BUTLER_REPOSITORY_INDEX'): 

422 with self.assertRaises(FileNotFoundError): 

423 dafButler.Butler('NotAValidCameraName') 

424 

425 def test_makeDefaultLatissButlerRaiseTypes(self): 

426 """makeDefaultLatissButler unifies the mixed exception types from 

427 butler inits, so test all available possibilities here. 

428 """ 

429 with unittest.mock.patch.dict('os.environ'): 

430 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there 

431 del os.environ['DAF_BUTLER_REPOSITORY_INDEX'] 

432 with self.assertRaises(FileNotFoundError): 

433 makeDefaultLatissButler() 

434 

435 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}): 

436 with self.assertRaises(FileNotFoundError): 

437 makeDefaultLatissButler() 

438 

439 fakeFile = '/path/to/a/file/which/does/not_exist.yaml' 

440 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}): 

441 with self.assertRaises(FileNotFoundError): 

442 makeDefaultLatissButler() 

443 

444 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self): 

445 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point 

446 # to an actual file 

447 repoFile = os.getenv('DAF_BUTLER_REPOSITORY_INDEX') 

448 if repoFile: 

449 self.assertTrue(ResourcePath(repoFile).exists()) 

450 

451 

452class TestMemory(lsst.utils.tests.MemoryTestCase): 

453 pass 

454 

455 

456def setup_module(module): 

457 lsst.utils.tests.init() 

458 

459 

460if __name__ == "__main__": 460 ↛ 461line 460 didn't jump to line 461, because the condition on line 460 was never true

461 lsst.utils.tests.init() 

462 unittest.main()