Coverage for tests/test_butlerUtils.py: 13%

307 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 04:56 -0700

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import copy 

23import datetime 

24import os 

25import random 

26import unittest 

27from typing import Iterable 

28 

29import lsst.daf.butler as dafButler 

30import lsst.utils.tests 

31from lsst.daf.butler import DatasetRef, NamedKeyMapping 

32from lsst.resources import ResourcePath 

33from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401 

34from lsst.summit.utils.butlerUtils import ( 

35 RECENT_DAY, 

36 _assureDict, 

37 _dayobs_present, 

38 _expid_present, 

39 _get_dayobs_key, 

40 _get_expid_key, 

41 _get_seqnum_key, 

42 _seqnum_present, 

43 datasetExists, 

44 fillDataId, 

45 getDatasetRefForDataId, 

46 getDayObs, 

47 getDayObsSeqNumFromExposureId, 

48 getDaysWithData, 

49 getExpId, 

50 getExpIdFromDayObsSeqNum, 

51 getExpRecord, 

52 getExpRecordFromDataId, 

53 getLatissDefaultCollections, 

54 getLatissOnSkyDataIds, 

55 getMostRecentDataId, 

56 getMostRecentDayObs, 

57 getSeqNum, 

58 getSeqNumsForDayObs, 

59 makeDefaultLatissButler, 

60 sanitizeDayObs, 

61 sortRecordsByDayObsThenSeqNum, 

62 updateDataId, 

63 updateDataIdOrDataCord, 

64) 

65 

66 

67class ButlerUtilsTestCase(lsst.utils.tests.TestCase): 

68 """A test case for testing sky position offsets for exposures.""" 

69 

70 def setUp(self): 

71 # this also functions as test_makeDefaultLatissButler(), but we may as 

72 # well catch the butler once it's made so it can be reused if needed, 

73 # given how hard it is to made it robustly 

74 

75 # butler stuff 

76 try: 

77 self.butler = makeDefaultLatissButler() 

78 except FileNotFoundError: 

79 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.") 

80 self.assertIsInstance(self.butler, dafButler.Butler) 

81 

82 # dict-like dataIds 

83 self.rawDataId = getMostRecentDataId(self.butler) 

84 self.fullId = fillDataId(self.butler, self.rawDataId) 

85 self.assertIn("exposure", self.fullId) 

86 self.assertIn("day_obs", self.fullId) 

87 self.assertIn("seq_num", self.fullId) 

88 self.expIdOnly = {"exposure": self.fullId["exposure"], "detector": 0} 

89 self.dayObsSeqNumIdOnly = { 

90 "day_obs": getDayObs(self.fullId), 

91 "seq_num": getSeqNum(self.fullId), 

92 "detector": 0, 

93 } 

94 

95 # expRecords 

96 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId) 

97 self.assertIsInstance(self.expRecordNoDetector, dafButler.dimensions.DimensionRecord) 

98 self.assertFalse(hasattr(self.expRecordNoDetector, "detector")) 

99 self.assertFalse("detector" in self.expRecordNoDetector.dataId) 

100 # just a crosscheck on the above to make sure other things are correct 

101 self.assertTrue(hasattr(self.expRecordNoDetector, "instrument")) 

102 

103 # data coordinates 

104 # popping here because butler.registry.expandDataId cannot have 

105 # day_obs or seq_num present right now 

106 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId) 

107 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum): 

108 rawDataIdNoDayObSeqNum.pop(dayObsKey) 

109 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum): 

110 rawDataIdNoDayObSeqNum.pop(seqNumKey) 

111 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum 

112 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0) 

113 self.dataCoordFullView = self.butler.registry.expandDataId( 

114 self.rawDataIdNoDayObSeqNum, detector=0 

115 ).full 

116 self.assertIsInstance(self.dataCoordMinimal, dafButler.dimensions.DataCoordinate) 

117 self.assertIsInstance(self.dataCoordFullView, NamedKeyMapping) 

118 

119 def test_getLatissDefaultCollections(self): 

120 defaultCollections = getLatissDefaultCollections() 

121 self.assertTrue(defaultCollections is not None) 

122 self.assertTrue(defaultCollections != []) 

123 self.assertTrue(len(defaultCollections) >= 1) 

124 

125 def test_RECENT_DAY(self): 

126 todayInt = int(datetime.date.today().strftime("%Y%m%d")) 

127 self.assertTrue(RECENT_DAY <= todayInt) # in the past 

128 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past 

129 

130 # check that the value of RECENT_DAY is before the end of the data. 

131 daysWithData = getDaysWithData(self.butler) 

132 self.assertLessEqual(RECENT_DAY, max(daysWithData)) 

133 

134 # no test here, but print a warning if it hasn't been updated recently 

135 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d") 

136 now = datetime.datetime.today() 

137 timeSinceUpdate = now - recentDay_datetime 

138 if timeSinceUpdate.days > 100: # TODO: 

139 print( 

140 f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. " 

141 "You might want to consider updating this to speed up butler queries." 

142 ) 

143 

144 def test_sanitizeDayObs(self): 

145 dayObs = "2020-01-02" 

146 self.assertEqual(sanitizeDayObs(dayObs), 20200102) 

147 dayObs = 20210201 

148 self.assertEqual(sanitizeDayObs(dayObs), dayObs) 

149 

150 with self.assertRaises(ValueError): 

151 sanitizeDayObs(1.234) 

152 sanitizeDayObs("Febuary 29th, 1970") 

153 

154 def test_getMostRecentDayObs(self): 

155 # just a basic sanity check here as we can't know the value, 

156 # but at least check something is returned, and is plausible 

157 recentDay = getMostRecentDayObs(self.butler) 

158 self.assertIsInstance(recentDay, int) 

159 self.assertTrue(recentDay >= RECENT_DAY) 

160 # some test data might be set a millennium in the future, i.e. 

161 # the year wouldd be 2XXX+1000, so set to y4k just in case 

162 self.assertTrue(recentDay < 40000000) 

163 

164 def test_getSeqNumsForDayObs(self): 

165 emptyDay = 19990101 

166 seqnums = getSeqNumsForDayObs(self.butler, emptyDay) 

167 self.assertIsInstance(seqnums, Iterable) 

168 self.assertEqual(len(list(seqnums)), 0) 

169 

170 recentDay = getMostRecentDayObs(self.butler) 

171 seqnums = getSeqNumsForDayObs(self.butler, recentDay) 

172 self.assertIsInstance(seqnums, Iterable) 

173 self.assertTrue(len(list(seqnums)) >= 1) 

174 

175 def test_getMostRecentDataId(self): 

176 # we can't know the values, but it should always return something 

177 # and the dict and int forms should always have certain keys and agree 

178 dataId = getMostRecentDataId(self.butler) 

179 self.assertIsInstance(dataId, dict) 

180 self.assertIn("day_obs", dataId) 

181 self.assertIn("seq_num", dataId) 

182 self.assertTrue("exposure" in dataId or "exposure.id" in dataId) 

183 

184 def test_getDatasetRefForDataId(self): 

185 dRef = getDatasetRefForDataId(self.butler, "raw", self.rawDataId) 

186 self.assertIsInstance(dRef, DatasetRef) 

187 

188 dRef = getDatasetRefForDataId(self.butler, "raw", self.rawDataIdNoDayObSeqNum) 

189 self.assertIsInstance(dRef, DatasetRef) 

190 dRef = getDatasetRefForDataId(self.butler, "raw", self.dataCoordMinimal) 

191 self.assertIsInstance(dRef, DatasetRef) 

192 dRef = getDatasetRefForDataId(self.butler, "raw", self.dataCoordFullView) 

193 self.assertIsInstance(dRef, DatasetRef) 

194 

195 def test__dayobs_present(self): 

196 goods = [{"day_obs": 123}, {"exposure.day_obs": 234}, {"day_obs": 345, "otherkey": -1}] 

197 bads = [{"different_key": 123}] 

198 for good in goods: 

199 self.assertTrue(_dayobs_present(good)) 

200 for bad in bads: 

201 self.assertFalse(_dayobs_present(bad)) 

202 

203 def test__seqnum_present(self): 

204 goods = [{"seq_num": 123}, {"exposure.seq_num": 234}, {"seq_num": 345, "otherkey": -1}] 

205 bads = [{"different_key": 123}] 

206 for good in goods: 

207 self.assertTrue(_seqnum_present(good)) 

208 for bad in bads: 

209 self.assertFalse(_seqnum_present(bad)) 

210 

211 def test__expid_present(self): 

212 goods = [{"exposure": 123}, {"exposure.id": 234}, {"exposure.id": 345, "otherkey": -1}] 

213 bads = [{"different_key": 123}] 

214 for good in goods: 

215 self.assertTrue(_expid_present(good)) 

216 for bad in bads: 

217 self.assertFalse(_expid_present(bad)) 

218 

219 def test_getDayObs(self): 

220 dayVal = 98765 

221 goods = [{"day_obs": dayVal}, {"exposure.day_obs": dayVal}, {"day_obs": dayVal, "otherkey": -1}] 

222 bads = [{"different_key": 123}] 

223 for good in goods: 

224 self.assertTrue(getDayObs(good) == dayVal) 

225 for bad in bads: 

226 self.assertTrue(getDayObs(bad) is None) 

227 

228 def test_getSeqNum(self): 

229 seqVal = 12345 

230 goods = [{"seq_num": seqVal}, {"exposure.seq_num": seqVal}, {"seq_num": seqVal, "otherkey": -1}] 

231 bads = [{"different_key": 123}] 

232 for good in goods: 

233 self.assertTrue(getSeqNum(good) == seqVal) 

234 for bad in bads: 

235 self.assertTrue(getSeqNum(bad) is None) 

236 

237 def test_getExpId(self): 

238 expIdVal = 12345 

239 goods = [{"exposure": expIdVal}, {"exposure.id": expIdVal}, {"exposure": expIdVal, "otherkey": -1}] 

240 bads = [{"different_key": 123}] 

241 for good in goods: 

242 self.assertTrue(getExpId(good) == expIdVal) 

243 for bad in bads: 

244 self.assertTrue(getExpId(bad) is None) 

245 

246 def test_datasetExists(self): 

247 self.assertTrue(datasetExists(self.butler, "raw", self.rawDataId)) 

248 self.assertTrue(datasetExists(self.butler, "raw", self.expIdOnly)) 

249 self.assertTrue(datasetExists(self.butler, "raw", self.dayObsSeqNumIdOnly)) 

250 return 

251 

252 def test_sortRecordsByDayObsThenSeqNum(self): 

253 where = "exposure.day_obs=dayObs" 

254 expRecords = self.butler.registry.queryDimensionRecords( 

255 "exposure", where=where, bind={"dayObs": RECENT_DAY} 

256 ) 

257 expRecords = list(expRecords) 

258 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

259 random.shuffle(expRecords) # they are often already in order, so make sure they're not 

260 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

261 for i, _id in enumerate(sortedIds[:-1]): 

262 self.assertTrue(_id.seq_num < sortedIds[i + 1].seq_num) 

263 

264 # Check that ambiguous sorts raise as expected 

265 with self.assertRaises(ValueError): 

266 expRecords = self.butler.registry.queryDimensionRecords( 

267 "exposure", where=where, bind={"dayObs": RECENT_DAY} 

268 ) 

269 expRecords = list(expRecords) 

270 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

271 expRecords.append(expRecords[0]) # add a duplicate 

272 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

273 return 

274 

275 def test_getDaysWithData(self): 

276 days = getDaysWithData(self.butler) 

277 self.assertTrue(len(days) >= 0) 

278 self.assertIsInstance(days[0], int) 

279 return 

280 

281 def test_getExpIdFromDayObsSeqNum(self): 

282 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly) 

283 self.assertTrue(_expid_present(expId)) 

284 return 

285 

286 def test_updateDataIdOrDataCord(self): 

287 updateVals = {"testKey": "testValue"} 

288 

289 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

290 for originalId in ids: 

291 testId = updateDataIdOrDataCord(originalId, **updateVals) 

292 for k, v in updateVals.items(): 

293 self.assertTrue(testId[k] == v) 

294 return 

295 

296 def test_fillDataId(self): 

297 self.assertFalse(_dayobs_present(self.expIdOnly)) 

298 self.assertFalse(_seqnum_present(self.expIdOnly)) 

299 

300 fullId = fillDataId(self.butler, self.expIdOnly) 

301 self.assertTrue(_dayobs_present(fullId)) 

302 self.assertTrue(_seqnum_present(fullId)) 

303 

304 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

305 for dataId in ids: 

306 fullId = fillDataId(self.butler, dataId) 

307 self.assertTrue(_dayobs_present(fullId)) 

308 self.assertTrue(_seqnum_present(fullId)) 

309 self.assertTrue(_expid_present(fullId)) 

310 return 

311 

312 def test_getExpRecordFromDataId(self): 

313 record = getExpRecordFromDataId(self.butler, self.rawDataId) 

314 self.assertIsInstance(record, dafButler.dimensions.DimensionRecord) 

315 return 

316 

317 def test_getDayObsSeqNumFromExposureId(self): 

318 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly) 

319 self.assertTrue(_dayobs_present(dayObsSeqNum)) 

320 self.assertTrue(_seqnum_present(dayObsSeqNum)) 

321 return 

322 

323 def test_removeDataProduct(self): 

324 # Can't think of an easy or safe test for this 

325 return 

326 

327 def test_getLatissOnSkyDataIds(self): 

328 # This is very slow, consider removing as it's the least import of all 

329 # the util functions. However, restricting it to only the most recent 

330 # day does help a lot, so probably OK like that, and should speed up 

331 # with middleware improvements in the future, and we should ensure 

332 # that they don't break this, so inclined to leave for now 

333 dayToUse = getDaysWithData(self.butler)[-1] 

334 # the most recent day with data might only be biases or flats so make 

335 # sure to override the default of skipping biases, darks & flats 

336 skipTypes = () 

337 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse) 

338 self.assertTrue(len(ids) > 0) 

339 self.assertTrue(ids[0] is not None) 

340 

341 ids = getLatissOnSkyDataIds( 

342 self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse, full=True 

343 ) 

344 self.assertTrue(len(ids) > 0) 

345 self.assertTrue(ids[0] is not None) 

346 testId = ids[0] 

347 self.assertTrue(_dayobs_present(testId)) 

348 self.assertTrue(_seqnum_present(testId)) 

349 self.assertTrue(_expid_present(testId)) 

350 return 

351 

352 def test__assureDict(self): 

353 for item in [ 

354 self.rawDataId, 

355 self.fullId, 

356 self.expIdOnly, 

357 self.expRecordNoDetector, 

358 self.dataCoordFullView, 

359 self.dataCoordMinimal, 

360 self.rawDataIdNoDayObSeqNum, 

361 ]: 

362 testId = _assureDict(item) 

363 self.assertIsInstance(testId, dict) 

364 return 

365 

366 def test__get_dayobs_key(self): 

367 dataId = {"a_random_key": 321, "exposure.day_obs": 20200312, "z_random_key": "abc"} 

368 self.assertTrue(_get_dayobs_key(dataId) == "exposure.day_obs") 

369 dataId = {"day_obs": 20200312} 

370 self.assertTrue(_get_dayobs_key(dataId) == "day_obs") 

371 dataId = {"missing": 20200312} 

372 self.assertTrue(_get_dayobs_key(dataId) is None) 

373 return 

374 

375 def test__get_seqnum_key(self): 

376 dataId = {"a_random_key": 321, "exposure.seq_num": 123, "z_random_key": "abc"} 

377 self.assertTrue(_get_seqnum_key(dataId) == "exposure.seq_num") 

378 dataId = {"seq_num": 123} 

379 self.assertTrue(_get_seqnum_key(dataId) == "seq_num") 

380 dataId = {"missing": 123} 

381 self.assertTrue(_get_seqnum_key(dataId) is None) 

382 return 

383 

384 def test__get_expid_key(self): 

385 dataId = {"a_random_key": 321, "exposure.id": 123, "z_random_key": "abc"} 

386 self.assertTrue(_get_expid_key(dataId) == "exposure.id") 

387 dataId = {"a_random_key": 321, "exposure": 123, "z_random_key": "abc"} 

388 self.assertTrue(_get_expid_key(dataId) == "exposure") 

389 dataId = {"missing": 123} 

390 self.assertTrue(_get_expid_key(dataId) is None) 

391 return 

392 

393 def test_updateDataId(self): 

394 # check with a dataCoordinate 

395 dataId = copy.copy(self.expRecordNoDetector.dataId) 

396 self.assertTrue("detector" not in dataId) 

397 dataId = updateDataId(dataId, detector=123) 

398 self.assertTrue("detector" in dataId) 

399 self.assertEqual(dataId["detector"], 123) 

400 

401 # check with a dict 

402 self.assertIsInstance(self.rawDataId, dict) 

403 dataId = copy.copy(self.rawDataId) 

404 dataId.pop("detector") 

405 self.assertTrue("detector" not in dataId) 

406 dataId = updateDataId(dataId, detector=321) 

407 self.assertTrue("detector" in dataId) 

408 self.assertEqual(dataId["detector"], 321) 

409 

410 def test_getExpRecord(self): 

411 expId = self.expIdOnly["exposure"] 

412 dayObs = self.dayObsSeqNumIdOnly["day_obs"] 

413 seqNum = self.dayObsSeqNumIdOnly["seq_num"] 

414 

415 recordByExpId = getExpRecord(self.butler, "LATISS", expId=expId) 

416 self.assertIsInstance(recordByExpId, dafButler.dimensions.DimensionRecord) 

417 

418 recordByDayObsSeqNum = getExpRecord(self.butler, "LATISS", dayObs=dayObs, seqNum=seqNum) 

419 self.assertIsInstance(recordByDayObsSeqNum, dafButler.dimensions.DimensionRecord) 

420 self.assertEqual(recordByExpId, recordByDayObsSeqNum) 

421 

422 with self.assertRaises(ValueError): 

423 # because we need dayObs too, so immediate raise due to bad args 

424 _ = getExpRecord(self.butler, "LATISS", seqNum=seqNum) 

425 

426 with self.assertRaises(RuntimeError): 

427 # (dayObs, seqNum) no longer matches the expId, so there are no 

428 # results, which is a RuntimeError 

429 _ = getExpRecord(self.butler, "LATISS", expId=expId, dayObs=dayObs, seqNum=seqNum + 1) 

430 

431 

432class ButlerInitTestCase(lsst.utils.tests.TestCase): 

433 """Separately test whether we can make a butler with the env var set 

434 and that the expected error type is raised and passed through when it is 

435 not, as this is relied upon to correctly skip tests when butler init is 

436 not possible. 

437 """ 

438 

439 def test_dafButlerRaiseTypes(self): 

440 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then 

441 # using an instrument label raises a FileNotFoundError 

442 with unittest.mock.patch.dict("os.environ"): 

443 if "DAF_BUTLER_REPOSITORY_INDEX" in os.environ: # can't del unless it's already there 

444 del os.environ["DAF_BUTLER_REPOSITORY_INDEX"] 

445 with self.assertRaises(FileNotFoundError): 

446 dafButler.Butler("LATISS") 

447 

448 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty 

449 # string then using a label raises a RuntimeError 

450 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ""}): 

451 with self.assertRaises(FileNotFoundError): 

452 dafButler.Butler("LATISS") 

453 

454 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given 

455 # camera existing, but we can check that we get the expected error 

456 # when trying to init an instrument which definitely won't be defined. 

457 if os.getenv("DAF_BUTLER_REPOSITORY_INDEX"): 

458 with self.assertRaises(FileNotFoundError): 

459 dafButler.Butler("NotAValidCameraName") 

460 

461 def test_makeDefaultLatissButlerRaiseTypes(self): 

462 """makeDefaultLatissButler unifies the mixed exception types from 

463 butler inits, so test all available possibilities here. 

464 """ 

465 with unittest.mock.patch.dict("os.environ"): 

466 if "DAF_BUTLER_REPOSITORY_INDEX" in os.environ: # can't del unless it's already there 

467 del os.environ["DAF_BUTLER_REPOSITORY_INDEX"] 

468 with self.assertRaises(FileNotFoundError): 

469 makeDefaultLatissButler() 

470 

471 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ""}): 

472 with self.assertRaises(FileNotFoundError): 

473 makeDefaultLatissButler() 

474 

475 fakeFile = "/path/to/a/file/which/does/not_exist.yaml" 

476 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}): 

477 with self.assertRaises(FileNotFoundError): 

478 makeDefaultLatissButler() 

479 

480 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self): 

481 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point 

482 # to an actual file 

483 repoFile = os.getenv("DAF_BUTLER_REPOSITORY_INDEX") 

484 if repoFile: 

485 self.assertTrue(ResourcePath(repoFile).exists()) 

486 

487 

488class TestMemory(lsst.utils.tests.MemoryTestCase): 

489 pass 

490 

491 

492def setup_module(module): 

493 lsst.utils.tests.init() 

494 

495 

496if __name__ == "__main__": 496 ↛ 497line 496 didn't jump to line 497, because the condition on line 496 was never true

497 lsst.utils.tests.init() 

498 unittest.main()