Coverage for tests/test_quantumBackedButler.py: 7%

216 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-22 03:05 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import json 

23import os 

24import unittest 

25import uuid 

26from typing import cast 

27 

28from lsst.daf.butler import ( 

29 Butler, 

30 Config, 

31 DatasetRef, 

32 DatasetType, 

33 DimensionUniverse, 

34 Quantum, 

35 QuantumBackedButler, 

36 QuantumProvenanceData, 

37 Registry, 

38 RegistryConfig, 

39 StorageClass, 

40) 

41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

42 

43TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

44 

45 

46class QuantumBackedButlerTestCase(unittest.TestCase): 

47 """Test case for QuantumBackedButler.""" 

48 

49 def setUp(self) -> None: 

50 self.root = makeTestTempDir(TESTDIR) 

51 self.config = Config() 

52 self.config["root"] = self.root 

53 self.universe = DimensionUniverse() 

54 

55 # Make a butler and import dimension definitions. 

56 registryConfig = RegistryConfig(self.config.get("registry")) 

57 Registry.createFromConfig(registryConfig, butlerRoot=self.root) 

58 self.butler = Butler(self.config, writeable=True, run="RUN") 

59 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

60 

61 # make all dataset types 

62 graph = self.universe.extract(("instrument", "detector")) 

63 storageClass = StorageClass("StructuredDataDict") 

64 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass) 

65 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass) 

66 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass) 

67 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass) 

68 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass) 

69 self.butler.registry.registerDatasetType(self.datasetTypeInit) 

70 self.butler.registry.registerDatasetType(self.datasetTypeInput) 

71 self.butler.registry.registerDatasetType(self.datasetTypeOutput) 

72 self.butler.registry.registerDatasetType(self.datasetTypeOutput2) 

73 self.butler.registry.registerDatasetType(self.datasetTypeExtra) 

74 

75 dataIds = [ 

76 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id)) 

77 for detector_id in (1, 2, 3, 4) 

78 ] 

79 

80 # make actual input datasets 

81 self.input_refs = [ 

82 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds 

83 ] 

84 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])] 

85 self.all_input_refs = self.input_refs + self.init_inputs_refs 

86 

87 # generate dataset refs for outputs 

88 self.output_refs = [ 

89 DatasetRef(self.datasetTypeOutput, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds 

90 ] 

91 self.output_refs2 = [ 

92 DatasetRef(self.datasetTypeOutput2, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds 

93 ] 

94 

95 self.missing_refs = [ 

96 DatasetRef(self.datasetTypeExtra, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds 

97 ] 

98 

99 def tearDown(self) -> None: 

100 removeTestTempDir(self.root) 

101 

102 def make_quantum(self, step: int = 1) -> Quantum: 

103 """Make a Quantum which includes datastore records.""" 

104 

105 if step == 1: 

106 datastore_records = self.butler.datastore.export_records(self.all_input_refs) 

107 predictedInputs = {self.datasetTypeInput: self.input_refs} 

108 outputs = {self.datasetTypeOutput: self.output_refs} 

109 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]} 

110 elif step == 2: 

111 # The result should be empty, this is just to test that it works. 

112 datastore_records = self.butler.datastore.export_records(self.output_refs) 

113 predictedInputs = {self.datasetTypeInput: self.output_refs} 

114 outputs = {self.datasetTypeOutput2: self.output_refs2} 

115 initInputs = {} 

116 else: 

117 raise ValueError(f"unexpected {step} value") 

118 

119 return Quantum( 

120 taskName="some.task.name", 

121 inputs=predictedInputs, 

122 outputs=outputs, 

123 initInputs=initInputs, 

124 datastore_records=datastore_records, 

125 ) 

126 

127 def test_initialize(self) -> None: 

128 """Test for initialize factory method""" 

129 

130 quantum = self.make_quantum() 

131 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

132 self._test_factory(qbb) 

133 

134 def test_from_predicted(self) -> None: 

135 """Test for from_predicted factory method""" 

136 

137 datastore_records = self.butler.datastore.export_records(self.all_input_refs) 

138 qbb = QuantumBackedButler.from_predicted( 

139 config=self.config, 

140 predicted_inputs=[ref.getCheckedId() for ref in self.all_input_refs], 

141 predicted_outputs=[ref.getCheckedId() for ref in self.output_refs], 

142 dimensions=self.universe, 

143 datastore_records=datastore_records, 

144 ) 

145 self._test_factory(qbb) 

146 

147 def _test_factory(self, qbb: QuantumBackedButler) -> None: 

148 """Test state immediately after construction.""" 

149 

150 self.assertTrue(qbb.isWriteable()) 

151 self.assertEqual(qbb._predicted_inputs, set(ref.id for ref in self.all_input_refs)) 

152 self.assertEqual(qbb._predicted_outputs, set(ref.id for ref in self.output_refs)) 

153 self.assertEqual(qbb._available_inputs, set()) 

154 self.assertEqual(qbb._unavailable_inputs, set()) 

155 self.assertEqual(qbb._actual_inputs, set()) 

156 self.assertEqual(qbb._actual_output_refs, set()) 

157 

158 def test_getPutDirect(self) -> None: 

159 """Test for getDirect/putDirect methods""" 

160 

161 quantum = self.make_quantum() 

162 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

163 

164 # Verify all input data are readable. 

165 for ref in self.input_refs: 

166 data = qbb.getDirect(ref) 

167 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

168 for ref in self.init_inputs_refs: 

169 data = qbb.getDirect(ref) 

170 self.assertEqual(data, {"data": -1}) 

171 for ref in self.missing_refs: 

172 with self.assertRaises(FileNotFoundError): 

173 data = qbb.getDirect(ref) 

174 

175 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

176 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

177 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs)) 

178 

179 # Write all expected outputs. 

180 for ref in self.output_refs: 

181 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

182 

183 # Must be able to read them back 

184 for ref in self.output_refs: 

185 data = qbb.getDirect(ref) 

186 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

187 

188 self.assertEqual(qbb._actual_output_refs, set(self.output_refs)) 

189 

190 def test_getDirectDeferred(self) -> None: 

191 """Test for getDirectDeferred method""" 

192 

193 quantum = self.make_quantum() 

194 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

195 

196 # get some input data 

197 input_refs = self.input_refs[:2] 

198 for ref in input_refs: 

199 data = qbb.getDirectDeferred(ref) 

200 self.assertEqual(data.get(), {"data": ref.dataId["detector"]}) 

201 for ref in self.init_inputs_refs: 

202 data = qbb.getDirectDeferred(ref) 

203 self.assertEqual(data.get(), {"data": -1}) 

204 for ref in self.missing_refs: 

205 data = qbb.getDirectDeferred(ref) 

206 with self.assertRaises(FileNotFoundError): 

207 data.get() 

208 

209 # _avalable_inputs is not 

210 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs)) 

211 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs)) 

212 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs)) 

213 

214 def test_datasetExistsDirect(self) -> None: 

215 """Test for datasetExistsDirect method""" 

216 

217 quantum = self.make_quantum() 

218 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

219 

220 # get some input data 

221 input_refs = self.input_refs[:2] 

222 for ref in input_refs: 

223 exists = qbb.datasetExistsDirect(ref) 

224 self.assertTrue(exists) 

225 for ref in self.init_inputs_refs: 

226 exists = qbb.datasetExistsDirect(ref) 

227 self.assertTrue(exists) 

228 for ref in self.missing_refs: 

229 exists = qbb.datasetExistsDirect(ref) 

230 self.assertFalse(exists) 

231 

232 # _available_inputs is not 

233 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs)) 

234 self.assertEqual(qbb._actual_inputs, set()) 

235 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect? 

236 

237 def test_markInputUnused(self) -> None: 

238 """Test for markInputUnused method""" 

239 

240 quantum = self.make_quantum() 

241 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

242 

243 # get some input data 

244 for ref in self.input_refs: 

245 data = qbb.getDirect(ref) 

246 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

247 for ref in self.init_inputs_refs: 

248 data = qbb.getDirect(ref) 

249 self.assertEqual(data, {"data": -1}) 

250 

251 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

252 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

253 

254 qbb.markInputUnused(self.input_refs[0]) 

255 self.assertEqual( 

256 qbb._actual_inputs, set(ref.id for ref in self.input_refs[1:] + self.init_inputs_refs) 

257 ) 

258 

259 def test_pruneDatasets(self) -> None: 

260 """Test for pruneDatasets methods""" 

261 

262 quantum = self.make_quantum() 

263 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

264 

265 # Write all expected outputs. 

266 for ref in self.output_refs: 

267 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

268 

269 # Must be able to read them back 

270 for ref in self.output_refs: 

271 data = qbb.getDirect(ref) 

272 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

273 

274 # Check for invalid arguments. 

275 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"): 

276 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True) 

277 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"): 

278 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True) 

279 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"): 

280 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False) 

281 

282 # Disassociate only. 

283 ref = self.output_refs[0] 

284 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False) 

285 self.assertFalse(qbb.datasetExistsDirect(ref)) 

286 with self.assertRaises(FileNotFoundError): 

287 data = qbb.getDirect(ref) 

288 

289 # can store it again 

290 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

291 self.assertTrue(qbb.datasetExistsDirect(ref)) 

292 

293 # Purge completely. 

294 ref = self.output_refs[1] 

295 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True) 

296 self.assertFalse(qbb.datasetExistsDirect(ref)) 

297 with self.assertRaises(FileNotFoundError): 

298 data = qbb.getDirect(ref) 

299 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

300 self.assertTrue(qbb.datasetExistsDirect(ref)) 

301 

302 def test_extract_provenance_data(self) -> None: 

303 """Test for extract_provenance_data method""" 

304 

305 quantum = self.make_quantum() 

306 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

307 

308 # read/store everything 

309 for ref in self.input_refs: 

310 qbb.getDirect(ref) 

311 for ref in self.init_inputs_refs: 

312 qbb.getDirect(ref) 

313 for ref in self.output_refs: 

314 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

315 

316 provenance1 = qbb.extract_provenance_data() 

317 prov_json = provenance1.json() 

318 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json)) 

319 for provenance in (provenance1, provenance2): 

320 input_ids = set(ref.id for ref in self.input_refs + self.init_inputs_refs) 

321 self.assertEqual(provenance.predicted_inputs, input_ids) 

322 self.assertEqual(provenance.available_inputs, input_ids) 

323 self.assertEqual(provenance.actual_inputs, input_ids) 

324 output_ids = set(ref.id for ref in self.output_refs) 

325 self.assertEqual(provenance.predicted_outputs, output_ids) 

326 self.assertEqual(provenance.actual_outputs, output_ids) 

327 datastore_name = "FileDatastore@<butlerRoot>/datastore" 

328 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name}) 

329 datastore_records = provenance.datastore_records[datastore_name] 

330 self.assertEqual(set(datastore_records.dataset_ids), output_ids) 

331 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo" 

332 table_name = "file_datastore_records" 

333 self.assertEqual(set(datastore_records.records.keys()), {class_name}) 

334 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name}) 

335 self.assertEqual( 

336 set(record["dataset_id"] for record in datastore_records.records[class_name][table_name]), 

337 output_ids, 

338 ) 

339 

340 def test_collect_and_transfer(self) -> None: 

341 """Test for collect_and_transfer method""" 

342 

343 quantum1 = self.make_quantum(1) 

344 qbb1 = QuantumBackedButler.initialize(config=self.config, quantum=quantum1, dimensions=self.universe) 

345 

346 quantum2 = self.make_quantum(2) 

347 qbb2 = QuantumBackedButler.initialize(config=self.config, quantum=quantum2, dimensions=self.universe) 

348 

349 # read/store everything 

350 for ref in self.input_refs: 

351 qbb1.getDirect(ref) 

352 for ref in self.init_inputs_refs: 

353 qbb1.getDirect(ref) 

354 for ref in self.output_refs: 

355 qbb1.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

356 

357 for ref in self.output_refs: 

358 qbb2.getDirect(ref) 

359 for ref in self.output_refs2: 

360 qbb2.putDirect({"data": cast(int, ref.dataId["detector"]) ** 3}, ref) 

361 

362 QuantumProvenanceData.collect_and_transfer( 

363 self.butler, 

364 [quantum1, quantum2], 

365 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()], 

366 ) 

367 

368 for ref in self.output_refs: 

369 data = self.butler.getDirect(ref) 

370 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

371 

372 for ref in self.output_refs2: 

373 data = self.butler.getDirect(ref) 

374 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3}) 

375 

376 

377if __name__ == "__main__": 377 ↛ 378line 377 didn't jump to line 378, because the condition on line 377 was never true

378 unittest.main()