Coverage for tests/test_butlerUtils.py: 13%

307 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-29 10:24 +0000

1# This file is part of summit_utils. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24from typing import Iterable 

25import datetime 

26import random 

27import copy 

28 

29import lsst.utils.tests 

30from lsst.summit.utils.butlerUtils import (makeDefaultLatissButler, 

31 updateDataId, 

32 sanitizeDayObs, 

33 getMostRecentDayObs, 

34 getSeqNumsForDayObs, 

35 getMostRecentDataId, 

36 getDatasetRefForDataId, 

37 _dayobs_present, 

38 _seqnum_present, 

39 _expid_present, 

40 _get_dayobs_key, 

41 _get_seqnum_key, 

42 _get_expid_key, 

43 getDayObs, 

44 getSeqNum, 

45 getExpId, 

46 datasetExists, 

47 sortRecordsByDayObsThenSeqNum, 

48 getDaysWithData, 

49 getExpIdFromDayObsSeqNum, 

50 updateDataIdOrDataCord, 

51 fillDataId, 

52 getExpRecordFromDataId, 

53 getDayObsSeqNumFromExposureId, 

54 getLatissOnSkyDataIds, 

55 _assureDict, 

56 getLatissDefaultCollections, 

57 RECENT_DAY, 

58 getExpRecord, 

59 ) 

60from lsst.summit.utils.butlerUtils import removeDataProduct # noqa: F401 

61import lsst.daf.butler as dafButler 

62from lsst.resources import ResourcePath 

63 

64 

65class ButlerUtilsTestCase(lsst.utils.tests.TestCase): 

66 """A test case for testing sky position offsets for exposures.""" 

67 

68 def setUp(self): 

69 # this also functions as test_makeDefaultLatissButler(), but we may as 

70 # well catch the butler once it's made so it can be reused if needed, 

71 # given how hard it is to made it robustly 

72 

73 # butler stuff 

74 try: 

75 self.butler = makeDefaultLatissButler() 

76 except FileNotFoundError: 

77 raise unittest.SkipTest("Skipping tests that require the LATISS butler repo.") 

78 self.assertIsInstance(self.butler, dafButler.Butler) 

79 

80 # dict-like dataIds 

81 self.rawDataId = getMostRecentDataId(self.butler) 

82 self.fullId = fillDataId(self.butler, self.rawDataId) 

83 self.assertIn('exposure', self.fullId) 

84 self.assertIn('day_obs', self.fullId) 

85 self.assertIn('seq_num', self.fullId) 

86 self.expIdOnly = {'exposure': self.fullId['exposure'], 'detector': 0} 

87 self.dayObsSeqNumIdOnly = {'day_obs': getDayObs(self.fullId), 'seq_num': getSeqNum(self.fullId), 

88 'detector': 0} 

89 

90 # expRecords 

91 self.expRecordNoDetector = getExpRecordFromDataId(self.butler, self.rawDataId) 

92 self.assertIsInstance(self.expRecordNoDetector, dafButler.dimensions.DimensionRecord) 

93 self.assertFalse(hasattr(self.expRecordNoDetector, 'detector')) 

94 self.assertFalse('detector' in self.expRecordNoDetector.dataId) 

95 # just a crosscheck on the above to make sure other things are correct 

96 self.assertTrue(hasattr(self.expRecordNoDetector, 'instrument')) 

97 

98 # data coordinates 

99 # popping here because butler.registry.expandDataId cannot have 

100 # day_obs or seq_num present right now 

101 rawDataIdNoDayObSeqNum = _assureDict(self.rawDataId) 

102 if dayObsKey := _get_dayobs_key(rawDataIdNoDayObSeqNum): 

103 rawDataIdNoDayObSeqNum.pop(dayObsKey) 

104 if seqNumKey := _get_seqnum_key(rawDataIdNoDayObSeqNum): 

105 rawDataIdNoDayObSeqNum.pop(seqNumKey) 

106 self.rawDataIdNoDayObSeqNum = rawDataIdNoDayObSeqNum 

107 self.dataCoordMinimal = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, detector=0) 

108 self.dataCoordFullView = self.butler.registry.expandDataId(self.rawDataIdNoDayObSeqNum, 

109 detector=0).full 

110 self.assertIsInstance(self.dataCoordMinimal, dafButler.dimensions.DataCoordinate) 

111 # NB the type check below is currently using a non-public API, but 

112 # at present there isn't a good alternative 

113 viewType = dafButler.core.dimensions._coordinate._DataCoordinateFullView 

114 self.assertIsInstance(self.dataCoordFullView, viewType) 

115 

116 def test_getLatissDefaultCollections(self): 

117 defaultCollections = getLatissDefaultCollections() 

118 self.assertTrue(defaultCollections is not None) 

119 self.assertTrue(defaultCollections != []) 

120 self.assertTrue(len(defaultCollections) >= 1) 

121 

122 def test_RECENT_DAY(self): 

123 todayInt = int(datetime.date.today().strftime("%Y%m%d")) 

124 self.assertTrue(RECENT_DAY <= todayInt) # in the past 

125 self.assertTrue(RECENT_DAY >= 20200101) # not too far in the past 

126 

127 # check that the value of RECENT_DAY is before the end of the data. 

128 daysWithData = getDaysWithData(self.butler) 

129 self.assertLessEqual(RECENT_DAY, max(daysWithData)) 

130 

131 # no test here, but print a warning if it hasn't been updated recently 

132 recentDay_datetime = datetime.datetime.strptime(str(RECENT_DAY), "%Y%m%d") 

133 now = datetime.datetime.today() 

134 timeSinceUpdate = now - recentDay_datetime 

135 if timeSinceUpdate.days > 100: # TODO: 

136 print(f"RECENT_DAY is now {timeSinceUpdate.days} days in the past. " 

137 "You might want to consider updating this to speed up butler queries.") 

138 

139 def test_sanitizeDayObs(self): 

140 dayObs = '2020-01-02' 

141 self.assertEqual(sanitizeDayObs(dayObs), 20200102) 

142 dayObs = 20210201 

143 self.assertEqual(sanitizeDayObs(dayObs), dayObs) 

144 

145 with self.assertRaises(ValueError): 

146 sanitizeDayObs(1.234) 

147 sanitizeDayObs('Febuary 29th, 1970') 

148 

149 def test_getMostRecentDayObs(self): 

150 # just a basic sanity check here as we can't know the value, 

151 # but at least check something is returned, and is plausible 

152 recentDay = getMostRecentDayObs(self.butler) 

153 self.assertIsInstance(recentDay, int) 

154 self.assertTrue(recentDay >= RECENT_DAY) 

155 # some test data might be set a millennium in the future, i.e. 

156 # the year wouldd be 2XXX+1000, so set to y4k just in case 

157 self.assertTrue(recentDay < 40000000) 

158 

159 def test_getSeqNumsForDayObs(self): 

160 emptyDay = 19990101 

161 seqnums = getSeqNumsForDayObs(self.butler, emptyDay) 

162 self.assertIsInstance(seqnums, Iterable) 

163 self.assertEqual(len(list(seqnums)), 0) 

164 

165 recentDay = getMostRecentDayObs(self.butler) 

166 seqnums = getSeqNumsForDayObs(self.butler, recentDay) 

167 self.assertIsInstance(seqnums, Iterable) 

168 self.assertTrue(len(list(seqnums)) >= 1) 

169 

170 def test_getMostRecentDataId(self): 

171 # we can't know the values, but it should always return something 

172 # and the dict and int forms should always have certain keys and agree 

173 dataId = getMostRecentDataId(self.butler) 

174 self.assertIsInstance(dataId, dict) 

175 self.assertIn('day_obs', dataId) 

176 self.assertIn('seq_num', dataId) 

177 self.assertTrue('exposure' in dataId or 'exposure.id' in dataId) 

178 

179 def test_getDatasetRefForDataId(self): 

180 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataId) 

181 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

182 

183 dRef = getDatasetRefForDataId(self.butler, 'raw', self.rawDataIdNoDayObSeqNum) 

184 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

185 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordMinimal) 

186 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

187 dRef = getDatasetRefForDataId(self.butler, 'raw', self.dataCoordFullView) 

188 self.assertIsInstance(dRef, lsst.daf.butler.core.datasets.ref.DatasetRef) 

189 

190 def test__dayobs_present(self): 

191 goods = [{'day_obs': 123}, {'exposure.day_obs': 234}, {'day_obs': 345, 'otherkey': -1}] 

192 bads = [{'different_key': 123}] 

193 for good in goods: 

194 self.assertTrue(_dayobs_present(good)) 

195 for bad in bads: 

196 self.assertFalse(_dayobs_present(bad)) 

197 

198 def test__seqnum_present(self): 

199 goods = [{'seq_num': 123}, {'exposure.seq_num': 234}, {'seq_num': 345, 'otherkey': -1}] 

200 bads = [{'different_key': 123}] 

201 for good in goods: 

202 self.assertTrue(_seqnum_present(good)) 

203 for bad in bads: 

204 self.assertFalse(_seqnum_present(bad)) 

205 

206 def test__expid_present(self): 

207 goods = [{'exposure': 123}, {'exposure.id': 234}, {'exposure.id': 345, 'otherkey': -1}] 

208 bads = [{'different_key': 123}] 

209 for good in goods: 

210 self.assertTrue(_expid_present(good)) 

211 for bad in bads: 

212 self.assertFalse(_expid_present(bad)) 

213 

214 def test_getDayObs(self): 

215 dayVal = 98765 

216 goods = [{'day_obs': dayVal}, {'exposure.day_obs': dayVal}, {'day_obs': dayVal, 'otherkey': -1}] 

217 bads = [{'different_key': 123}] 

218 for good in goods: 

219 self.assertTrue(getDayObs(good) == dayVal) 

220 for bad in bads: 

221 self.assertTrue(getDayObs(bad) is None) 

222 

223 def test_getSeqNum(self): 

224 seqVal = 12345 

225 goods = [{'seq_num': seqVal}, {'exposure.seq_num': seqVal}, {'seq_num': seqVal, 'otherkey': -1}] 

226 bads = [{'different_key': 123}] 

227 for good in goods: 

228 self.assertTrue(getSeqNum(good) == seqVal) 

229 for bad in bads: 

230 self.assertTrue(getSeqNum(bad) is None) 

231 

232 def test_getExpId(self): 

233 expIdVal = 12345 

234 goods = [{'exposure': expIdVal}, {'exposure.id': expIdVal}, {'exposure': expIdVal, 'otherkey': -1}] 

235 bads = [{'different_key': 123}] 

236 for good in goods: 

237 self.assertTrue(getExpId(good) == expIdVal) 

238 for bad in bads: 

239 self.assertTrue(getExpId(bad) is None) 

240 

241 def test_datasetExists(self): 

242 self.assertTrue(datasetExists(self.butler, 'raw', self.rawDataId)) 

243 self.assertTrue(datasetExists(self.butler, 'raw', self.expIdOnly)) 

244 self.assertTrue(datasetExists(self.butler, 'raw', self.dayObsSeqNumIdOnly)) 

245 return 

246 

247 def test_sortRecordsByDayObsThenSeqNum(self): 

248 where = "exposure.day_obs=day_obs" 

249 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where, 

250 bind={'day_obs': RECENT_DAY}) 

251 expRecords = list(expRecords) 

252 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

253 random.shuffle(expRecords) # they are often already in order, so make sure they're not 

254 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

255 for i, _id in enumerate(sortedIds[:-1]): 

256 self.assertTrue(_id.seq_num < sortedIds[i+1].seq_num) 

257 

258 # Check that ambiguous sorts raise as expected 

259 with self.assertRaises(ValueError): 

260 expRecords = self.butler.registry.queryDimensionRecords("exposure", where=where, 

261 bind={'day_obs': RECENT_DAY}) 

262 expRecords = list(expRecords) 

263 self.assertGreaterEqual(len(expRecords), 1) # just ensure we're not doing a no-op test 

264 expRecords.append(expRecords[0]) # add a duplicate 

265 sortedIds = sortRecordsByDayObsThenSeqNum(expRecords) 

266 return 

267 

268 def test_getDaysWithData(self): 

269 days = getDaysWithData(self.butler) 

270 self.assertTrue(len(days) >= 0) 

271 self.assertIsInstance(days[0], int) 

272 return 

273 

274 def test_getExpIdFromDayObsSeqNum(self): 

275 expId = getExpIdFromDayObsSeqNum(self.butler, self.dayObsSeqNumIdOnly) 

276 self.assertTrue(_expid_present(expId)) 

277 return 

278 

279 def test_updateDataIdOrDataCord(self): 

280 updateVals = {'testKey': 'testValue'} 

281 

282 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

283 for originalId in ids: 

284 testId = updateDataIdOrDataCord(originalId, **updateVals) 

285 for k, v in updateVals.items(): 

286 self.assertTrue(testId[k] == v) 

287 return 

288 

289 def test_fillDataId(self): 

290 self.assertFalse(_dayobs_present(self.expIdOnly)) 

291 self.assertFalse(_seqnum_present(self.expIdOnly)) 

292 

293 fullId = fillDataId(self.butler, self.expIdOnly) 

294 self.assertTrue(_dayobs_present(fullId)) 

295 self.assertTrue(_seqnum_present(fullId)) 

296 

297 ids = [self.rawDataId, self.expRecordNoDetector, self.dataCoordMinimal, self.dataCoordFullView] 

298 for dataId in ids: 

299 fullId = fillDataId(self.butler, dataId) 

300 self.assertTrue(_dayobs_present(fullId)) 

301 self.assertTrue(_seqnum_present(fullId)) 

302 self.assertTrue(_expid_present(fullId)) 

303 return 

304 

305 def test_getExpRecordFromDataId(self): 

306 record = getExpRecordFromDataId(self.butler, self.rawDataId) 

307 self.assertIsInstance(record, dafButler.dimensions.DimensionRecord) 

308 return 

309 

310 def test_getDayObsSeqNumFromExposureId(self): 

311 dayObsSeqNum = getDayObsSeqNumFromExposureId(self.butler, self.expIdOnly) 

312 self.assertTrue(_dayobs_present(dayObsSeqNum)) 

313 self.assertTrue(_seqnum_present(dayObsSeqNum)) 

314 return 

315 

316 def test_removeDataProduct(self): 

317 # Can't think of an easy or safe test for this 

318 return 

319 

320 def test_getLatissOnSkyDataIds(self): 

321 # This is very slow, consider removing as it's the least import of all 

322 # the util functions. However, restricting it to only the most recent 

323 # day does help a lot, so probably OK like that, and should speed up 

324 # with middleware improvements in the future, and we should ensure 

325 # that they don't break this, so inclined to leave for now 

326 dayToUse = getDaysWithData(self.butler)[-1] 

327 # the most recent day with data might only be biases or flats so make 

328 # sure to override the default of skipping biases, darks & flats 

329 skipTypes = () 

330 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse) 

331 self.assertTrue(len(ids) > 0) 

332 self.assertTrue(ids[0] is not None) 

333 

334 ids = getLatissOnSkyDataIds(self.butler, skipTypes=skipTypes, startDate=dayToUse, endDate=dayToUse, 

335 full=True) 

336 self.assertTrue(len(ids) > 0) 

337 self.assertTrue(ids[0] is not None) 

338 testId = ids[0] 

339 self.assertTrue(_dayobs_present(testId)) 

340 self.assertTrue(_seqnum_present(testId)) 

341 self.assertTrue(_expid_present(testId)) 

342 return 

343 

344 def test__assureDict(self): 

345 for item in [self.rawDataId, self.fullId, self.expIdOnly, 

346 self.expRecordNoDetector, self.dataCoordFullView, 

347 self.dataCoordMinimal, self.rawDataIdNoDayObSeqNum]: 

348 testId = _assureDict(item) 

349 self.assertIsInstance(testId, dict) 

350 return 

351 

352 def test__get_dayobs_key(self): 

353 dataId = {'a_random_key': 321, 'exposure.day_obs': 20200312, 'z_random_key': 'abc'} 

354 self.assertTrue(_get_dayobs_key(dataId) == 'exposure.day_obs') 

355 dataId = {'day_obs': 20200312} 

356 self.assertTrue(_get_dayobs_key(dataId) == 'day_obs') 

357 dataId = {'missing': 20200312} 

358 self.assertTrue(_get_dayobs_key(dataId) is None) 

359 return 

360 

361 def test__get_seqnum_key(self): 

362 dataId = {'a_random_key': 321, 'exposure.seq_num': 123, 'z_random_key': 'abc'} 

363 self.assertTrue(_get_seqnum_key(dataId) == 'exposure.seq_num') 

364 dataId = {'seq_num': 123} 

365 self.assertTrue(_get_seqnum_key(dataId) == 'seq_num') 

366 dataId = {'missing': 123} 

367 self.assertTrue(_get_seqnum_key(dataId) is None) 

368 return 

369 

370 def test__get_expid_key(self): 

371 dataId = {'a_random_key': 321, 'exposure.id': 123, 'z_random_key': 'abc'} 

372 self.assertTrue(_get_expid_key(dataId) == 'exposure.id') 

373 dataId = {'a_random_key': 321, 'exposure': 123, 'z_random_key': 'abc'} 

374 self.assertTrue(_get_expid_key(dataId) == 'exposure') 

375 dataId = {'missing': 123} 

376 self.assertTrue(_get_expid_key(dataId) is None) 

377 return 

378 

379 def test_updateDataId(self): 

380 # check with a dataCoordinate 

381 dataId = copy.copy(self.expRecordNoDetector.dataId) 

382 self.assertTrue('detector' not in dataId) 

383 dataId = updateDataId(dataId, detector=123) 

384 self.assertTrue('detector' in dataId) 

385 self.assertEqual(dataId['detector'], 123) 

386 

387 # check with a dict 

388 self.assertIsInstance(self.rawDataId, dict) 

389 dataId = copy.copy(self.rawDataId) 

390 dataId.pop('detector') 

391 self.assertTrue('detector' not in dataId) 

392 dataId = updateDataId(dataId, detector=321) 

393 self.assertTrue('detector' in dataId) 

394 self.assertEqual(dataId['detector'], 321) 

395 

396 def test_getExpRecord(self): 

397 expId = self.expIdOnly['exposure'] 

398 dayObs = self.dayObsSeqNumIdOnly['day_obs'] 

399 seqNum = self.dayObsSeqNumIdOnly['seq_num'] 

400 

401 recordByExpId = getExpRecord(self.butler, 'LATISS', expId=expId) 

402 self.assertIsInstance(recordByExpId, dafButler.dimensions.DimensionRecord) 

403 

404 recordByDayObsSeqNum = getExpRecord(self.butler, 'LATISS', dayObs=dayObs, seqNum=seqNum) 

405 self.assertIsInstance(recordByDayObsSeqNum, dafButler.dimensions.DimensionRecord) 

406 self.assertEqual(recordByExpId, recordByDayObsSeqNum) 

407 

408 with self.assertRaises(ValueError): 

409 # because we need dayObs too, so immediate raise due to bad args 

410 _ = getExpRecord(self.butler, 'LATISS', seqNum=seqNum) 

411 

412 with self.assertRaises(RuntimeError): 

413 # (dayObs, seqNum) no longer matches the expId, so there are no 

414 # results, which is a RuntimeError 

415 _ = getExpRecord(self.butler, 'LATISS', expId=expId, dayObs=dayObs, seqNum=seqNum+1) 

416 

417 

418class ButlerInitTestCase(lsst.utils.tests.TestCase): 

419 """Separately test whether we can make a butler with the env var set 

420 and that the expected error type is raised and passed through when it is 

421 not, as this is relied upon to correctly skip tests when butler init is 

422 not possible. 

423 """ 

424 

425 def test_dafButlerRaiseTypes(self): 

426 # If DAF_BUTLER_REPOSITORY_INDEX is not set *at all* then 

427 # using an instrument label raises a FileNotFoundError 

428 with unittest.mock.patch.dict('os.environ'): 

429 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there 

430 del os.environ['DAF_BUTLER_REPOSITORY_INDEX'] 

431 with self.assertRaises(FileNotFoundError): 

432 dafButler.Butler('LATISS') 

433 

434 # If DAF_BUTLER_REPOSITORY_INDEX is present but is just an empty 

435 # string then using a label raises a RuntimeError 

436 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}): 

437 with self.assertRaises(FileNotFoundError): 

438 dafButler.Butler('LATISS') 

439 

440 # If DAF_BUTLER_REPOSITORY_INDEX _is_ set, we can't rely on any given 

441 # camera existing, but we can check that we get the expected error 

442 # when trying to init an instrument which definitely won't be defined. 

443 if os.getenv('DAF_BUTLER_REPOSITORY_INDEX'): 

444 with self.assertRaises(FileNotFoundError): 

445 dafButler.Butler('NotAValidCameraName') 

446 

447 def test_makeDefaultLatissButlerRaiseTypes(self): 

448 """makeDefaultLatissButler unifies the mixed exception types from 

449 butler inits, so test all available possibilities here. 

450 """ 

451 with unittest.mock.patch.dict('os.environ'): 

452 if 'DAF_BUTLER_REPOSITORY_INDEX' in os.environ: # can't del unless it's already there 

453 del os.environ['DAF_BUTLER_REPOSITORY_INDEX'] 

454 with self.assertRaises(FileNotFoundError): 

455 makeDefaultLatissButler() 

456 

457 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": ''}): 

458 with self.assertRaises(FileNotFoundError): 

459 makeDefaultLatissButler() 

460 

461 fakeFile = '/path/to/a/file/which/does/not_exist.yaml' 

462 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": fakeFile}): 

463 with self.assertRaises(FileNotFoundError): 

464 makeDefaultLatissButler() 

465 

466 def test_DAF_BUTLER_REPOSITORY_INDEX_value(self): 

467 # If DAF_BUTLER_REPOSITORY_INDEX is truthy then we expect it to point 

468 # to an actual file 

469 repoFile = os.getenv('DAF_BUTLER_REPOSITORY_INDEX') 

470 if repoFile: 

471 self.assertTrue(ResourcePath(repoFile).exists()) 

472 

473 

474class TestMemory(lsst.utils.tests.MemoryTestCase): 

475 pass 

476 

477 

478def setup_module(module): 

479 lsst.utils.tests.init() 

480 

481 

482if __name__ == "__main__": 482 ↛ 483line 482 didn't jump to line 483, because the condition on line 482 was never true

483 lsst.utils.tests.init() 

484 unittest.main()