Coverage for tests/test_butlerUtils.py: 13%

307 statements  

« prev     ^ index     » next       coverage.py v7.4.2, created at 2024-02-23 15:47 +0000

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24from typing import Iterable 

25import datetime 

26import random 

27import copy 

28 

29import lsst.utils.tests 

30from lsst.summit.utils.butlerUtils import (makeDefaultLatissButler, 

31 updateDataId, 

32 sanitizeDayObs, 

33 getMostRecentDayObs, 

34 getSeqNumsForDayObs, 

35 getMostRecentDataId, 

36 getDatasetRefForDataId, 

37 _dayobs_present, 

38 _seqnum_present, 

39 _expid_present, 

40 _get_dayobs_key, 

41 _get_seqnum_key, 

42 _get_expid_key, 

43 getDayObs, 

44 getSeqNum, 

45 getExpId, 

46 datasetExists, 

47 sortRecordsByDayObsThenSeqNum, 

48 getDaysWithData, 

49 getExpIdFromDayObsSeqNum, 

50 updateDataIdOrDataCord, 

51 fillDataId, 

52 getExpRecordFromDataId, 

53 getDayObsSeqNumFromExposureId, 

54 getLatissOnSkyDataIds, 

55 _assureDict, 

56 getLatissDefaultCollections, 

57 RECENT_DAY, 

58 getExpRecord, 

59 ) 

60from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401 

61import lsst.daf.butler as dafButler 

62from lsst.daf.butler import DatasetRef, NamedKeyMapping 

63from lsst.resources import ResourcePath 

64 

65 

66class ButlerUtilsTestCase(lsst.utils.tests.TestCase): 

67 """A test case for testing sky position offsets for exposures.""" 

68 

69 def setUp(self): 

70 # this also functions as test_makeDefaultLatissButler(), but we may as 

71 # well catch the butler once it's made so it can be reused if needed, 

72 # given how hard it is to made it robustly 

73 

74 # butler stuff 

75 try: 

76 self.butler = makeDefaultLatissButler() 

77 except FileNotFoundError: 

78 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.") 

79 self.assertIsInstance(self.butler, dafButler.Butler) 

80 

81 # dict-like dataIds 

82 self.rawDataId = getMostRecentDataId(self.butler) 

83 self.fullId = fillDataId(self.butler, self.rawDataId) 

84 self.assertIn('exposure', self.fullId) 

85 self.assertIn('day_obs', self.fullId) 

86 self.assertIn('seq_num', self.fullId) 

87 self.expIdOnly = {'exposure': self.fullId['exposure'], 'detector': 0} 

88 self.dayObsSeqNumIdOnly = {'day_obs': getDayObs(self.fullId), 'seq_num': getSeqNum(self.fullId), 

89 'detector': 0} 

90 

91 # expRecords 

92 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId) 

93 self.assertIsInstance(self.expRecordNoDetector, dafButler.dimensions.DimensionRecord) 

94 self.assertFalse(hasattr(self.expRecordNoDetector, 'detector')) 

95 self.assertFalse('detector' in self.expRecordNoDetector.dataId) 

96 # just a crosscheck on the above to make sure other things are correct 

97 self.assertTrue(hasattr(self.expRecordNoDetector, 'instrument')) 

98 

99 # data coordinates 

100 # popping here because butler.registry.expandDataId cannot have 

101 # day_obs or seq_num present right now 

102 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId) 

103 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum): 

104 rawDataIdNoDayObSeqNum.pop(dayObsKey) 

105 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum): 

106 rawDataIdNoDayObSeqNum.pop(seqNumKey) 

107 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum 

108 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0) 

109 self.dataCoordFullView = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, 

110 detector=0).full 

111 self.assertIsInstance(self.dataCoordMinimal, dafButler.dimensions.DataCoordinate) 

112 self.assertIsInstance(self.dataCoordFullView, NamedKeyMapping) 

113 

114 def test_getLatissDefaultCollections(self): 

115 defaultCollections = getLatissDefaultCollections() 

116 self.assertTrue(defaultCollections is not None) 

117 self.assertTrue(defaultCollections != []) 

118 self.assertTrue(len(defaultCollections) >= 1) 

119 

120 def test_RECENT_DAY(self): 

121 todayInt = int(datetime.date.today().strftime("%Y%m%d")) 

122 self.assertTrue(RECENT_DAY <= todayInt) # in the past 

123 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past 

124 

125 # check that the value of RECENT_DAY is before the end of the data. 

126 daysWithData = getDaysWithData(self.butler) 

127 self.assertLessEqual(RECENT_DAY, max(daysWithData)) 

128 

129 # no test here, but print a warning if it hasn't been updated recently 

130 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d") 

131 now = datetime.datetime.today() 

132 timeSinceUpdate = now - recentDay_datetime 

133 if timeSinceUpdate.days > 100: # TODO: 

134 print(f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. " 

135 "You might want to consider updating this to speed up butler queries.") 

136 

137 def test_sanitizeDayObs(self): 

138 dayObs = '2020-01-02' 

139 self.assertEqual(sanitizeDayObs(dayObs), 20200102) 

140 dayObs = 20210201 

141 self.assertEqual(sanitizeDayObs(dayObs), dayObs) 

142 

143 with self.assertRaises(ValueError): 

144 sanitizeDayObs(1.234) 

145 sanitizeDayObs('Febuary 29th, 1970') 

146 

147 def test_getMostRecentDayObs(self): 

148 # just a basic sanity check here as we can't know the value, 

149 # but at least check something is returned, and is plausible 

150 recentDay = getMostRecentDayObs(self.butler) 

151 self.assertIsInstance(recentDay, int) 

152 self.assertTrue(recentDay >= RECENT_DAY) 

153 # some test data might be set a millennium in the future, i.e. 

154 # the year wouldd be 2XXX+1000, so set to y4k just in case 

155 self.assertTrue(recentDay < 40000000) 

156 

157 def test_getSeqNumsForDayObs(self): 

158 emptyDay = 19990101 

159 seqnums = getSeqNumsForDayObs(self.butler, emptyDay) 

160 self.assertIsInstance(seqnums, Iterable) 

161 self.assertEqual(len(list(seqnums)), 0) 

162 

163 recentDay = getMostRecentDayObs(self.butler) 

164 seqnums = getSeqNumsForDayObs(self.butler, recentDay) 

165 self.assertIsInstance(seqnums, Iterable) 

166 self.assertTrue(len(list(seqnums)) >= 1) 

167 

168 def test_getMostRecentDataId(self): 

169 # we can't know the values, but it should always return something 

170 # and the dict and int forms should always have certain keys and agree 

171 dataId = getMostRecentDataId(self.butler) 

172 self.assertIsInstance(dataId, dict) 

173 self.assertIn('day_obs', dataId) 

174 self.assertIn('seq_num', dataId) 

175 self.assertTrue('exposure' in dataId or 'exposure.id' in dataId) 

176 

177 def test_getDatasetRefForDataId(self): 

178 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataId) 

179 self.assertIsInstance(dRef, DatasetRef) 

180 

181 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataIdNoDayObSeqNum) 

182 self.assertIsInstance(dRef, DatasetRef) 

183 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordMinimal) 

184 self.assertIsInstance(dRef, DatasetRef) 

185 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordFullView) 

186 self.assertIsInstance(dRef, DatasetRef) 

187 

188 def test__dayobs_present(self): 

189 goods = [{'day_obs': 123}, {'exposure.day_obs': 234}, {'day_obs': 345, 'otherkey': -1}] 

190 bads = [{'different_key': 123}] 

191 for good in goods: 

192 self.assertTrue(_dayobs_present(good)) 

193 for bad in bads: 

194 self.assertFalse(_dayobs_present(bad)) 

195 

196 def test__seqnum_present(self): 

197 goods = [{'seq_num': 123}, {'exposure.seq_num': 234}, {'seq_num': 345, 'otherkey': -1}] 

198 bads = [{'different_key': 123}] 

199 for good in goods: 

200 self.assertTrue(_seqnum_present(good)) 

201 for bad in bads: 

202 self.assertFalse(_seqnum_present(bad)) 

203 

204 def test__expid_present(self): 

205 goods = [{'exposure': 123}, {'exposure.id': 234}, {'exposure.id': 345, 'otherkey': -1}] 

206 bads = [{'different_key': 123}] 

207 for good in goods: 

208 self.assertTrue(_expid_present(good)) 

209 for bad in bads: 

210 self.assertFalse(_expid_present(bad)) 

211 

212 def test_getDayObs(self): 

213 dayVal = 98765 

214 goods = [{'day_obs': dayVal}, {'exposure.day_obs': dayVal}, {'day_obs': dayVal, 'otherkey': -1}] 

215 bads = [{'different_key': 123}] 

216 for good in goods: 

217 self.assertTrue(getDayObs(good) == dayVal) 

218 for bad in bads: 

219 self.assertTrue(getDayObs(bad) is None) 

220 

221 def test_getSeqNum(self): 

222 seqVal = 12345 

223 goods = [{'seq_num': seqVal}, {'exposure.seq_num': seqVal}, {'seq_num': seqVal, 'otherkey': -1}] 

224 bads = [{'different_key': 123}] 

225 for good in goods: 

226 self.assertTrue(getSeqNum(good) == seqVal) 

227 for bad in bads: 

228 self.assertTrue(getSeqNum(bad) is None) 

229 

230 def test_getExpId(self): 

231 expIdVal = 12345 

232 goods = [{'exposure': expIdVal}, {'exposure.id': expIdVal}, {'exposure': expIdVal, 'otherkey': -1}] 

233 bads = [{'different_key': 123}] 

234 for good in goods: 

235 self.assertTrue(getExpId(good) == expIdVal) 

236 for bad in bads: 

237 self.assertTrue(getExpId(bad) is None) 

238 

239 def test_datasetExists(self): 

240 self.assertTrue(datasetExists(self.butler, 'raw', self.rawDataId)) 

241 self.assertTrue(datasetExists(self.butler, 'raw', self.expIdOnly)) 

242 self.assertTrue(datasetExists(self.butler, 'raw', self.dayObsSeqNumIdOnly)) 

243 return 

244 

245 def test_sortRecordsByDayObsThenSeqNum(self): 

246 where = "exposure.day_obs=day_obs" 

247 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where, 

248 bind={'day_obs': RECENT_DAY}) 

249 expRecords = list(expRecords) 

250 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

251 random.shuffle(expRecords) # they are often already in order, so make sure they're not 

252 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

253 for i, _id in enumerate(sortedIds[:-1]): 

254 self.assertTrue(_id.seq_num < sortedIds[i+1].seq_num) 

255 

256 # Check that ambiguous sorts raise as expected 

257 with self.assertRaises(ValueError): 

258 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where, 

259 bind={'day_obs': RECENT_DAY}) 

260 expRecords = list(expRecords) 

261 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

262 expRecords.append(expRecords[0]) # add a duplicate 

263 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

264 return 

265 

266 def test_getDaysWithData(self): 

267 days = getDaysWithData(self.butler) 

268 self.assertTrue(len(days) >= 0) 

269 self.assertIsInstance(days[0], int) 

270 return 

271 

272 def test_getExpIdFromDayObsSeqNum(self): 

273 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly) 

274 self.assertTrue(_expid_present(expId)) 

275 return 

276 

277 def test_updateDataIdOrDataCord(self): 

278 updateVals = {'testKey': 'testValue'} 

279 

280 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

281 for originalId in ids: 

282 testId = updateDataIdOrDataCord(originalId, **updateVals) 

283 for k, v in updateVals.items(): 

284 self.assertTrue(testId[k] == v) 

285 return 

286 

287 def test_fillDataId(self): 

288 self.assertFalse(_dayobs_present(self.expIdOnly)) 

289 self.assertFalse(_seqnum_present(self.expIdOnly)) 

290 

291 fullId = fillDataId(self.butler, self.expIdOnly) 

292 self.assertTrue(_dayobs_present(fullId)) 

293 self.assertTrue(_seqnum_present(fullId)) 

294 

295 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

296 for dataId in ids: 

297 fullId = fillDataId(self.butler, dataId) 

298 self.assertTrue(_dayobs_present(fullId)) 

299 self.assertTrue(_seqnum_present(fullId)) 

300 self.assertTrue(_expid_present(fullId)) 

301 return 

302 

303 def test_getExpRecordFromDataId(self): 

304 record = getExpRecordFromDataId(self.butler, self.rawDataId) 

305 self.assertIsInstance(record, dafButler.dimensions.DimensionRecord) 

306 return 

307 

308 def test_getDayObsSeqNumFromExposureId(self): 

309 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly) 

310 self.assertTrue(_dayobs_present(dayObsSeqNum)) 

311 self.assertTrue(_seqnum_present(dayObsSeqNum)) 

312 return 

313 

314 def test_removeDataProduct(self): 

315 # Can't think of an easy or safe test for this 

316 return 

317 

318 def test_getLatissOnSkyDataIds(self): 

319 # This is very slow, consider removing as it's the least import of all 

320 # the util functions. However, restricting it to only the most recent 

321 # day does help a lot, so probably OK like that, and should speed up 

322 # with middleware improvements in the future, and we should ensure 

323 # that they don't break this, so inclined to leave for now 

324 dayToUse = getDaysWithData(self.butler)[-1] 

325 # the most recent day with data might only be biases or flats so make 

326 # sure to override the default of skipping biases, darks & flats 

327 skipTypes = () 

328 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse) 

329 self.assertTrue(len(ids) > 0) 

330 self.assertTrue(ids[0] is not None) 

331 

332 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse, 

333 full=True) 

334 self.assertTrue(len(ids) > 0) 

335 self.assertTrue(ids[0] is not None) 

336 testId = ids[0] 

337 self.assertTrue(_dayobs_present(testId)) 

338 self.assertTrue(_seqnum_present(testId)) 

339 self.assertTrue(_expid_present(testId)) 

340 return 

341 

342 def test__assureDict(self): 

343 for item in [self.rawDataId, self.fullId, self.expIdOnly, 

344 self.expRecordNoDetector, self.dataCoordFullView, 

345 self.dataCoordMinimal, self.rawDataIdNoDayObSeqNum]: 

346 testId = _assureDict(item) 

347 self.assertIsInstance(testId, dict) 

348 return 

349 

350 def test__get_dayobs_key(self): 

351 dataId = {'a_random_key': 321, 'exposure.day_obs': 20200312, 'z_random_key': 'abc'} 

352 self.assertTrue(_get_dayobs_key(dataId) == 'exposure.day_obs') 

353 dataId = {'day_obs': 20200312} 

354 self.assertTrue(_get_dayobs_key(dataId) == 'day_obs') 

355 dataId = {'missing': 20200312} 

356 self.assertTrue(_get_dayobs_key(dataId) is None) 

357 return 

358 

359 def test__get_seqnum_key(self): 

360 dataId = {'a_random_key': 321, 'exposure.seq_num': 123, 'z_random_key': 'abc'} 

361 self.assertTrue(_get_seqnum_key(dataId) == 'exposure.seq_num') 

362 dataId = {'seq_num': 123} 

363 self.assertTrue(_get_seqnum_key(dataId) == 'seq_num') 

364 dataId = {'missing': 123} 

365 self.assertTrue(_get_seqnum_key(dataId) is None) 

366 return 

367 

368 def test__get_expid_key(self): 

369 dataId = {'a_random_key': 321, 'exposure.id': 123, 'z_random_key': 'abc'} 

370 self.assertTrue(_get_expid_key(dataId) == 'exposure.id') 

371 dataId = {'a_random_key': 321, 'exposure': 123, 'z_random_key': 'abc'} 

372 self.assertTrue(_get_expid_key(dataId) == 'exposure') 

373 dataId = {'missing': 123} 

374 self.assertTrue(_get_expid_key(dataId) is None) 

375 return 

376 

377 def test_updateDataId(self): 

378 # check with a dataCoordinate 

379 dataId = copy.copy(self.expRecordNoDetector.dataId) 

380 self.assertTrue('detector' not in dataId) 

381 dataId = updateDataId(dataId, detector=123) 

382 self.assertTrue('detector' in dataId) 

383 self.assertEqual(dataId['detector'], 123) 

384 

385 # check with a dict 

386 self.assertIsInstance(self.rawDataId, dict) 

387 dataId = copy.copy(self.rawDataId) 

388 dataId.pop('detector') 

389 self.assertTrue('detector' not in dataId) 

390 dataId = updateDataId(dataId, detector=321) 

391 self.assertTrue('detector' in dataId) 

392 self.assertEqual(dataId['detector'], 321) 

393 

394 def test_getExpRecord(self): 

395 expId = self.expIdOnly['exposure'] 

396 dayObs = self.dayObsSeqNumIdOnly['day_obs'] 

397 seqNum = self.dayObsSeqNumIdOnly['seq_num'] 

398 

399 recordByExpId = getExpRecord(self.butler, 'LATISS', expId=expId) 

400 self.assertIsInstance(recordByExpId, dafButler.dimensions.DimensionRecord) 

401 

402 recordByDayObsSeqNum = getExpRecord(self.butler, 'LATISS', dayObs=dayObs, seqNum=seqNum) 

403 self.assertIsInstance(recordByDayObsSeqNum, dafButler.dimensions.DimensionRecord) 

404 self.assertEqual(recordByExpId, recordByDayObsSeqNum) 

405 

406 with self.assertRaises(ValueError): 

407 # because we need dayObs too, so immediate raise due to bad args 

408 _ = getExpRecord(self.butler, 'LATISS', seqNum=seqNum) 

409 

410 with self.assertRaises(RuntimeError): 

411 # (dayObs, seqNum) no longer matches the expId, so there are no 

412 # results, which is a RuntimeError 

413 _ = getExpRecord(self.butler, 'LATISS', expId=expId, dayObs=dayObs, seqNum=seqNum+1) 

414 

415 

416class ButlerInitTestCase(lsst.utils.tests.TestCase): 

417 """Separately test whether we can make a butler with the env var set 

418 and that the expected error type is raised and passed through when it is 

419 not, as this is relied upon to correctly skip tests when butler init is 

420 not possible. 

421 """ 

422 

423 def test_dafButlerRaiseTypes(self): 

424 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then 

425 # using an instrument label raises a FileNotFoundError 

426 with unittest.mock.patch.dict('os.environ'): 

427 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there 

428 del os.environ['DAF_BUTLER_REPOSITORY_INDEX'] 

429 with self.assertRaises(FileNotFoundError): 

430 dafButler.Butler('LATISS') 

431 

432 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty 

433 # string then using a label raises a RuntimeError 

434 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}): 

435 with self.assertRaises(FileNotFoundError): 

436 dafButler.Butler('LATISS') 

437 

438 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given 

439 # camera existing, but we can check that we get the expected error 

440 # when trying to init an instrument which definitely won't be defined. 

441 if os.getenv('DAF_BUTLER_REPOSITORY_INDEX'): 

442 with self.assertRaises(FileNotFoundError): 

443 dafButler.Butler('NotAValidCameraName') 

444 

445 def test_makeDefaultLatissButlerRaiseTypes(self): 

446 """makeDefaultLatissButler unifies the mixed exception types from 

447 butler inits, so test all available possibilities here. 

448 """ 

449 with unittest.mock.patch.dict('os.environ'): 

450 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there 

451 del os.environ['DAF_BUTLER_REPOSITORY_INDEX'] 

452 with self.assertRaises(FileNotFoundError): 

453 makeDefaultLatissButler() 

454 

455 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}): 

456 with self.assertRaises(FileNotFoundError): 

457 makeDefaultLatissButler() 

458 

459 fakeFile = '/path/to/a/file/which/does/not_exist.yaml' 

460 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}): 

461 with self.assertRaises(FileNotFoundError): 

462 makeDefaultLatissButler() 

463 

464 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self): 

465 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point 

466 # to an actual file 

467 repoFile = os.getenv('DAF_BUTLER_REPOSITORY_INDEX') 

468 if repoFile: 

469 self.assertTrue(ResourcePath(repoFile).exists()) 

470 

471 

472class TestMemory(lsst.utils.tests.MemoryTestCase): 

473 pass 

474 

475 

476def setup_module(module): 

477 lsst.utils.tests.init() 

478 

479 

480if __name__ == "__main__": 480 ↛ 481line 480 didn't jump to line 481, because the condition on line 480 was never true

481 lsst.utils.tests.init() 

482 unittest.main()