Coverage for tests / test_butlerUtils.py: 14%

307 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-25 09:03 +0000

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import copy 

23import datetime 

24import os 

25import random 

26import unittest 

27from typing import Iterable 

28 

29import lsst.daf.butler as dafButler 

30import lsst.utils.tests 

31from lsst.daf.butler import DatasetRef 

32from lsst.resources import ResourcePath 

33from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401 

34from lsst.summit.utils.butlerUtils import ( 

35 RECENT_DAY, 

36 _assureDict, 

37 _dayobs_present, 

38 _expid_present, 

39 _get_dayobs_key, 

40 _get_expid_key, 

41 _get_seqnum_key, 

42 _seqnum_present, 

43 fillDataId, 

44 getDatasetRefForDataId, 

45 getDayObs, 

46 getDayObsSeqNumFromExposureId, 

47 getDaysWithData, 

48 getExpId, 

49 getExpIdFromDayObsSeqNum, 

50 getExpRecord, 

51 getExpRecordFromDataId, 

52 getLatissDefaultCollections, 

53 getLatissOnSkyDataIds, 

54 getMostRecentDataId, 

55 getMostRecentDayObs, 

56 getSeqNum, 

57 getSeqNumsForDayObs, 

58 getSite, 

59 makeDefaultLatissButler, 

60 sanitizeDayObs, 

61 sortRecordsByDayObsThenSeqNum, 

62 updateDataId, 

63 updateDataIdOrDataCord, 

64) 

65 

66 

67class ButlerUtilsTestCase(lsst.utils.tests.TestCase): 

68 """A test case for testing sky position offsets for exposures.""" 

69 

70 def setUp(self): 

71 # this also functions as test_makeDefaultLatissButler(), but we may as 

72 # well catch the butler once it's made so it can be reused if needed, 

73 # given how hard it is to made it robustly 

74 

75 # butler stuff 

76 try: 

77 if getSite() == "jenkins": 

78 raise unittest.SkipTest("Skip running butler-driven tests in Jenkins.") 

79 self.butler = makeDefaultLatissButler() 

80 except FileNotFoundError: 

81 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.") 

82 self.assertIsInstance(self.butler, dafButler.Butler) 

83 

84 # dict-like dataIds 

85 self.rawDataId = getMostRecentDataId(self.butler) 

86 self.fullId = fillDataId(self.butler, self.rawDataId) 

87 self.assertIn("exposure", self.fullId) 

88 self.assertIn("day_obs", self.fullId) 

89 self.assertIn("seq_num", self.fullId) 

90 self.expIdOnly = {"exposure": self.fullId["exposure"], "detector": 0} 

91 self.dayObsSeqNumIdOnly = { 

92 "day_obs": getDayObs(self.fullId), 

93 "seq_num": getSeqNum(self.fullId), 

94 "detector": 0, 

95 } 

96 

97 # expRecords 

98 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId) 

99 self.assertIsInstance(self.expRecordNoDetector, dafButler.DimensionRecord) 

100 self.assertFalse(hasattr(self.expRecordNoDetector, "detector")) 

101 self.assertFalse("detector" in self.expRecordNoDetector.dataId) 

102 # just a crosscheck on the above to make sure other things are correct 

103 self.assertTrue(hasattr(self.expRecordNoDetector, "instrument")) 

104 

105 # data coordinates 

106 # popping here because butler.registry.expandDataId cannot have 

107 # day_obs or seq_num present right now 

108 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId) 

109 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum): 

110 rawDataIdNoDayObSeqNum.pop(dayObsKey) 

111 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum): 

112 rawDataIdNoDayObSeqNum.pop(seqNumKey) 

113 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum 

114 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0) 

115 self.assertIsInstance(self.dataCoordMinimal, dafButler.DataCoordinate) 

116 

117 def test_getLatissDefaultCollections(self): 

118 defaultCollections = getLatissDefaultCollections() 

119 self.assertTrue(defaultCollections is not None) 

120 self.assertTrue(defaultCollections != []) 

121 self.assertTrue(len(defaultCollections) >= 1) 

122 

123 def test_RECENT_DAY(self): 

124 todayInt = int(datetime.date.today().strftime("%Y%m%d")) 

125 self.assertTrue(RECENT_DAY <= todayInt) # in the past 

126 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past 

127 

128 # check that the value of RECENT_DAY is before the end of the data. 

129 daysWithData = getDaysWithData(self.butler) 

130 self.assertLessEqual(RECENT_DAY, max(daysWithData)) 

131 

132 # no test here, but print a warning if it hasn't been updated recently 

133 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d") 

134 now = datetime.datetime.today() 

135 timeSinceUpdate = now - recentDay_datetime 

136 if timeSinceUpdate.days > 100: # TODO: 

137 print( 

138 f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. " 

139 "You might want to consider updating this to speed up butler queries." 

140 ) 

141 

142 def test_sanitizeDayObs(self): 

143 dayObs = "2020-01-02" 

144 self.assertEqual(sanitizeDayObs(dayObs), 20200102) 

145 dayObs = 20210201 

146 self.assertEqual(sanitizeDayObs(dayObs), dayObs) 

147 

148 with self.assertRaises(ValueError): 

149 sanitizeDayObs(1.234) 

150 sanitizeDayObs("Febuary 29th, 1970") 

151 

152 def test_getMostRecentDayObs(self): 

153 # just a basic sanity check here as we can't know the value, 

154 # but at least check something is returned, and is plausible 

155 recentDay = getMostRecentDayObs(self.butler) 

156 self.assertIsInstance(recentDay, int) 

157 self.assertTrue(recentDay >= RECENT_DAY) 

158 # some test data might be set a millennium in the future, i.e. 

159 # the year wouldd be 2XXX+1000, so set to y4k just in case 

160 self.assertTrue(recentDay < 40000000) 

161 

162 def test_getSeqNumsForDayObs(self): 

163 emptyDay = 19990101 

164 seqnums = getSeqNumsForDayObs(self.butler, emptyDay) 

165 self.assertIsInstance(seqnums, Iterable) 

166 self.assertEqual(len(list(seqnums)), 0) 

167 

168 recentDay = getMostRecentDayObs(self.butler) 

169 seqnums = getSeqNumsForDayObs(self.butler, recentDay) 

170 self.assertIsInstance(seqnums, Iterable) 

171 self.assertTrue(len(list(seqnums)) >= 1) 

172 

173 def test_getMostRecentDataId(self): 

174 # we can't know the values, but it should always return something 

175 # and the dict and int forms should always have certain keys and agree 

176 dataId = getMostRecentDataId(self.butler) 

177 self.assertIsInstance(dataId, dict) 

178 self.assertIn("day_obs", dataId) 

179 self.assertIn("seq_num", dataId) 

180 self.assertTrue("exposure" in dataId or "exposure.id" in dataId) 

181 

182 def test_getDatasetRefForDataId(self): 

183 dRef = getDatasetRefForDataId(self.butler, "raw", self.rawDataId) 

184 self.assertIsInstance(dRef, DatasetRef) 

185 

186 dRef = getDatasetRefForDataId(self.butler, "raw", self.rawDataIdNoDayObSeqNum) 

187 self.assertIsInstance(dRef, DatasetRef) 

188 dRef = getDatasetRefForDataId(self.butler, "raw", self.dataCoordMinimal) 

189 self.assertIsInstance(dRef, DatasetRef) 

190 

191 def test__dayobs_present(self): 

192 goods = [{"day_obs": 123}, {"exposure.day_obs": 234}, {"day_obs": 345, "otherkey": -1}] 

193 bads = [{"different_key": 123}] 

194 for good in goods: 

195 self.assertTrue(_dayobs_present(good)) 

196 for bad in bads: 

197 self.assertFalse(_dayobs_present(bad)) 

198 

199 def test__seqnum_present(self): 

200 goods = [{"seq_num": 123}, {"exposure.seq_num": 234}, {"seq_num": 345, "otherkey": -1}] 

201 bads = [{"different_key": 123}] 

202 for good in goods: 

203 self.assertTrue(_seqnum_present(good)) 

204 for bad in bads: 

205 self.assertFalse(_seqnum_present(bad)) 

206 

207 def test__expid_present(self): 

208 goods = [{"exposure": 123}, {"exposure.id": 234}, {"exposure.id": 345, "otherkey": -1}] 

209 bads = [{"different_key": 123}] 

210 for good in goods: 

211 self.assertTrue(_expid_present(good)) 

212 for bad in bads: 

213 self.assertFalse(_expid_present(bad)) 

214 

215 def test_getDayObs(self): 

216 dayVal = 98765 

217 goods = [{"day_obs": dayVal}, {"exposure.day_obs": dayVal}, {"day_obs": dayVal, "otherkey": -1}] 

218 bads = [{"different_key": 123}] 

219 for good in goods: 

220 self.assertTrue(getDayObs(good) == dayVal) 

221 for bad in bads: 

222 self.assertTrue(getDayObs(bad) is None) 

223 

224 def test_getSeqNum(self): 

225 seqVal = 12345 

226 goods = [{"seq_num": seqVal}, {"exposure.seq_num": seqVal}, {"seq_num": seqVal, "otherkey": -1}] 

227 bads = [{"different_key": 123}] 

228 for good in goods: 

229 self.assertTrue(getSeqNum(good) == seqVal) 

230 for bad in bads: 

231 self.assertTrue(getSeqNum(bad) is None) 

232 

233 def test_getExpId(self): 

234 expIdVal = 12345 

235 goods = [{"exposure": expIdVal}, {"exposure.id": expIdVal}, {"exposure": expIdVal, "otherkey": -1}] 

236 bads = [{"different_key": 123}] 

237 for good in goods: 

238 self.assertTrue(getExpId(good) == expIdVal) 

239 for bad in bads: 

240 self.assertTrue(getExpId(bad) is None) 

241 

242 def test_datasetExists(self): 

243 self.assertTrue(self.butler.exists("raw", self.rawDataId)) 

244 self.assertTrue(self.butler.exists("raw", self.expIdOnly)) 

245 self.assertTrue(self.butler.exists("raw", self.dayObsSeqNumIdOnly)) 

246 return 

247 

248 def test_sortRecordsByDayObsThenSeqNum(self): 

249 where = "exposure.day_obs=dayObs" 

250 expRecords = self.butler.registry.queryDimensionRecords( 

251 "exposure", where=where, bind={"dayObs": RECENT_DAY} 

252 ) 

253 expRecords = list(expRecords) 

254 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

255 random.shuffle(expRecords) # they are often already in order, so make sure they're not 

256 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

257 for i, _id in enumerate(sortedIds[:-1]): 

258 self.assertTrue(_id.seq_num < sortedIds[i + 1].seq_num) 

259 

260 # Check that ambiguous sorts raise as expected 

261 with self.assertRaises(ValueError): 

262 expRecords = self.butler.registry.queryDimensionRecords( 

263 "exposure", where=where, bind={"dayObs": RECENT_DAY} 

264 ) 

265 expRecords = list(expRecords) 

266 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

267 expRecords.append(expRecords[0]) # add a duplicate 

268 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

269 return 

270 

271 def test_getDaysWithData(self): 

272 days = getDaysWithData(self.butler) 

273 self.assertTrue(len(days) >= 0) 

274 self.assertIsInstance(days[0], int) 

275 return 

276 

277 def test_getExpIdFromDayObsSeqNum(self): 

278 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly) 

279 self.assertTrue(_expid_present(expId)) 

280 return 

281 

282 def test_updateDataIdOrDataCord(self): 

283 updateVals = {"testKey": "testValue"} 

284 

285 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal] 

286 for originalId in ids: 

287 testId = updateDataIdOrDataCord(originalId, **updateVals) 

288 for k, v in updateVals.items(): 

289 self.assertTrue(testId[k] == v) 

290 return 

291 

292 def test_fillDataId(self): 

293 self.assertFalse(_dayobs_present(self.expIdOnly)) 

294 self.assertFalse(_seqnum_present(self.expIdOnly)) 

295 

296 fullId = fillDataId(self.butler, self.expIdOnly) 

297 self.assertTrue(_dayobs_present(fullId)) 

298 self.assertTrue(_seqnum_present(fullId)) 

299 

300 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal] 

301 for dataId in ids: 

302 fullId = fillDataId(self.butler, dataId) 

303 self.assertTrue(_dayobs_present(fullId)) 

304 self.assertTrue(_seqnum_present(fullId)) 

305 self.assertTrue(_expid_present(fullId)) 

306 return 

307 

308 def test_getExpRecordFromDataId(self): 

309 record = getExpRecordFromDataId(self.butler, self.rawDataId) 

310 self.assertIsInstance(record, dafButler.DimensionRecord) 

311 return 

312 

313 def test_getDayObsSeqNumFromExposureId(self): 

314 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly) 

315 self.assertTrue(_dayobs_present(dayObsSeqNum)) 

316 self.assertTrue(_seqnum_present(dayObsSeqNum)) 

317 return 

318 

319 def test_removeDataProduct(self): 

320 # Can't think of an easy or safe test for this 

321 return 

322 

323 def test_getLatissOnSkyDataIds(self): 

324 # This is very slow, consider removing as it's the least import of all 

325 # the util functions. However, restricting it to only the most recent 

326 # day does help a lot, so probably OK like that, and should speed up 

327 # with middleware improvements in the future, and we should ensure 

328 # that they don't break this, so inclined to leave for now 

329 dayToUse = getDaysWithData(self.butler)[-1] 

330 # the most recent day with data might only be biases or flats so make 

331 # sure to override the default of skipping biases, darks & flats 

332 skipTypes = () 

333 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse) 

334 self.assertTrue(len(ids) > 0) 

335 self.assertTrue(ids[0] is not None) 

336 

337 ids = getLatissOnSkyDataIds( 

338 self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse, full=True 

339 ) 

340 self.assertTrue(len(ids) > 0) 

341 self.assertTrue(ids[0] is not None) 

342 testId = ids[0] 

343 self.assertTrue(_dayobs_present(testId)) 

344 self.assertTrue(_seqnum_present(testId)) 

345 self.assertTrue(_expid_present(testId)) 

346 return 

347 

348 def test__assureDict(self): 

349 for item in [ 

350 self.rawDataId, 

351 self.fullId, 

352 self.expIdOnly, 

353 self.expRecordNoDetector, 

354 self.dataCoordMinimal, 

355 self.rawDataIdNoDayObSeqNum, 

356 ]: 

357 testId = _assureDict(item) 

358 self.assertIsInstance(testId, dict) 

359 return 

360 

361 def test__get_dayobs_key(self): 

362 dataId = {"a_random_key": 321, "exposure.day_obs": 20200312, "z_random_key": "abc"} 

363 self.assertTrue(_get_dayobs_key(dataId) == "exposure.day_obs") 

364 dataId = {"day_obs": 20200312} 

365 self.assertTrue(_get_dayobs_key(dataId) == "day_obs") 

366 dataId = {"missing": 20200312} 

367 self.assertTrue(_get_dayobs_key(dataId) is None) 

368 return 

369 

370 def test__get_seqnum_key(self): 

371 dataId = {"a_random_key": 321, "exposure.seq_num": 123, "z_random_key": "abc"} 

372 self.assertTrue(_get_seqnum_key(dataId) == "exposure.seq_num") 

373 dataId = {"seq_num": 123} 

374 self.assertTrue(_get_seqnum_key(dataId) == "seq_num") 

375 dataId = {"missing": 123} 

376 self.assertTrue(_get_seqnum_key(dataId) is None) 

377 return 

378 

379 def test__get_expid_key(self): 

380 dataId = {"a_random_key": 321, "exposure.id": 123, "z_random_key": "abc"} 

381 self.assertTrue(_get_expid_key(dataId) == "exposure.id") 

382 dataId = {"a_random_key": 321, "exposure": 123, "z_random_key": "abc"} 

383 self.assertTrue(_get_expid_key(dataId) == "exposure") 

384 dataId = {"missing": 123} 

385 self.assertTrue(_get_expid_key(dataId) is None) 

386 return 

387 

388 def test_updateDataId(self): 

389 # check with a dataCoordinate 

390 dataId = copy.copy(self.expRecordNoDetector.dataId) 

391 self.assertTrue("detector" not in dataId) 

392 dataId = updateDataId(dataId, detector=123) 

393 self.assertTrue("detector" in dataId) 

394 self.assertEqual(dataId["detector"], 123) 

395 

396 # check with a dict 

397 self.assertIsInstance(self.rawDataId, dict) 

398 dataId = copy.copy(self.rawDataId) 

399 dataId.pop("detector") 

400 self.assertTrue("detector" not in dataId) 

401 dataId = updateDataId(dataId, detector=321) 

402 self.assertTrue("detector" in dataId) 

403 self.assertEqual(dataId["detector"], 321) 

404 

405 def test_getExpRecord(self): 

406 expId = self.expIdOnly["exposure"] 

407 dayObs = self.dayObsSeqNumIdOnly["day_obs"] 

408 seqNum = self.dayObsSeqNumIdOnly["seq_num"] 

409 

410 recordByExpId = getExpRecord(self.butler, "LATISS", expId=expId) 

411 self.assertIsInstance(recordByExpId, dafButler.DimensionRecord) 

412 

413 recordByDayObsSeqNum = getExpRecord(self.butler, "LATISS", dayObs=dayObs, seqNum=seqNum) 

414 self.assertIsInstance(recordByDayObsSeqNum, dafButler.DimensionRecord) 

415 self.assertEqual(recordByExpId, recordByDayObsSeqNum) 

416 

417 with self.assertRaises(ValueError): 

418 # because we need dayObs too, so immediate raise due to bad args 

419 _ = getExpRecord(self.butler, "LATISS", seqNum=seqNum) 

420 

421 with self.assertRaises(RuntimeError): 

422 # (dayObs, seqNum) no longer matches the expId, so there are no 

423 # results, which is a RuntimeError 

424 _ = getExpRecord(self.butler, "LATISS", expId=expId, dayObs=dayObs, seqNum=seqNum + 1) 

425 

426 

427class ButlerInitTestCase(lsst.utils.tests.TestCase): 

428 """Separately test whether we can make a butler with the env var set 

429 and that the expected error type is raised and passed through when it is 

430 not, as this is relied upon to correctly skip tests when butler init is 

431 not possible. 

432 """ 

433 

434 def test_dafButlerRaiseTypes(self): 

435 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then 

436 # using an instrument label raises a FileNotFoundError 

437 with unittest.mock.patch.dict("os.environ"): 

438 if "DAF_BUTLER_REPOSITORY_INDEX" in os.environ: # can't del unless it's already there 

439 del os.environ["DAF_BUTLER_REPOSITORY_INDEX"] 

440 with self.assertRaises(FileNotFoundError): 

441 dafButler.Butler("LATISS") 

442 

443 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty 

444 # string then using a label raises a RuntimeError 

445 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ""}): 

446 with self.assertRaises(FileNotFoundError): 

447 dafButler.Butler("LATISS") 

448 

449 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given 

450 # camera existing, but we can check that we get the expected error 

451 # when trying to init an instrument which definitely won't be defined. 

452 if os.getenv("DAF_BUTLER_REPOSITORY_INDEX"): 

453 with self.assertRaises(FileNotFoundError): 

454 dafButler.Butler("NotAValidCameraName") 

455 

456 def test_makeDefaultLatissButlerRaiseTypes(self): 

457 """makeDefaultLatissButler unifies the mixed exception types from 

458 butler inits, so test all available possibilities here. 

459 """ 

460 if getSite() == "jenkins": 

461 raise unittest.SkipTest("Skip running butler-driven tests in Jenkins.") 

462 with unittest.mock.patch.dict("os.environ"): 

463 if "DAF_BUTLER_REPOSITORY_INDEX" in os.environ: # can't del unless it's already there 

464 del os.environ["DAF_BUTLER_REPOSITORY_INDEX"] 

465 with self.assertRaises(FileNotFoundError): 

466 makeDefaultLatissButler() 

467 

468 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ""}): 

469 with self.assertRaises(FileNotFoundError): 

470 makeDefaultLatissButler() 

471 

472 fakeFile = "/path/to/a/file/which/does/not_exist.yaml" 

473 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}): 

474 with self.assertRaises(FileNotFoundError): 

475 makeDefaultLatissButler() 

476 

477 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self): 

478 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point 

479 # to an actual file 

480 repoFile = os.getenv("DAF_BUTLER_REPOSITORY_INDEX") 

481 if repoFile: 

482 self.assertTrue(ResourcePath(repoFile).exists()) 

483 

484 

485class TestMemory(lsst.utils.tests.MemoryTestCase): 

486 pass 

487 

488 

489def setup_module(module): 

490 lsst.utils.tests.init() 

491 

492 

493if __name__ == "__main__": 493 ↛ 494line 493 didn't jump to line 494 because the condition on line 493 was never true

494 lsst.utils.tests.init() 

495 unittest.main()