Coverage for tests/test_quantumBackedButler.py: 7%

207 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-28 09:59 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import json 

23import os 

24import unittest 

25import uuid 

26 

27from lsst.daf.butler import ( 

28 Butler, 

29 Config, 

30 DatasetRef, 

31 DatasetType, 

32 DimensionUniverse, 

33 Quantum, 

34 QuantumBackedButler, 

35 QuantumProvenanceData, 

36 Registry, 

37 RegistryConfig, 

38 StorageClass, 

39) 

40from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

41 

42TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

43 

44 

45class QuantumBackedButlerTestCase(unittest.TestCase): 

46 """Test case for QuantumBackedButler.""" 

47 

48 def setUp(self): 

49 self.root = makeTestTempDir(TESTDIR) 

50 self.config = Config() 

51 self.config["root"] = self.root 

52 self.universe = DimensionUniverse() 

53 

54 # Make a butler and import dimension definitions. 

55 registryConfig = RegistryConfig(self.config.get("registry")) 

56 Registry.createFromConfig(registryConfig, butlerRoot=self.root) 

57 self.butler = Butler(self.config, writeable=True, run="RUN") 

58 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

59 

60 # make all dataset types 

61 graph = self.universe.extract(("instrument", "detector")) 

62 storageClass = StorageClass("StructuredDataDict") 

63 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass) 

64 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass) 

65 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass) 

66 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass) 

67 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass) 

68 self.butler.registry.registerDatasetType(self.datasetTypeInit) 

69 self.butler.registry.registerDatasetType(self.datasetTypeInput) 

70 self.butler.registry.registerDatasetType(self.datasetTypeOutput) 

71 self.butler.registry.registerDatasetType(self.datasetTypeOutput2) 

72 self.butler.registry.registerDatasetType(self.datasetTypeExtra) 

73 

74 dataIds = [ 

75 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id)) 

76 for detector_id in (1, 2, 3, 4) 

77 ] 

78 

79 # make actual input datasets 

80 self.input_refs = [ 

81 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds 

82 ] 

83 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])] 

84 

85 # generate dataset refs for outputs 

86 self.output_refs = [ 

87 DatasetRef(self.datasetTypeOutput, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds 

88 ] 

89 self.output_refs2 = [ 

90 DatasetRef(self.datasetTypeOutput2, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds 

91 ] 

92 

93 self.missing_refs = [ 

94 DatasetRef(self.datasetTypeExtra, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds 

95 ] 

96 

97 def tearDown(self): 

98 removeTestTempDir(self.root) 

99 

100 def make_quantum(self, step: int = 1) -> Quantum: 

101 """Make a Quantum which includes datastore records.""" 

102 

103 if step == 1: 

104 datastore_records = self.butler.datastore.export_records(self.input_refs + self.init_inputs_refs) 

105 predictedInputs = {self.datasetTypeInput: self.input_refs} 

106 outputs = {self.datasetTypeOutput: self.output_refs} 

107 initInputs = {self.datasetTypeInit: self.init_inputs_refs} 

108 elif step == 2: 

109 # The result should be empty, this is just to test that it works. 

110 datastore_records = self.butler.datastore.export_records(self.output_refs) 

111 predictedInputs = {self.datasetTypeInput: self.output_refs} 

112 outputs = {self.datasetTypeOutput2: self.output_refs2} 

113 initInputs = {} 

114 

115 return Quantum( 

116 taskName="some.task.name", 

117 inputs=predictedInputs, 

118 outputs=outputs, 

119 initInputs=initInputs, 

120 datastore_records=datastore_records, 

121 ) 

122 

123 def test_initialize(self): 

124 """Test for initialize method""" 

125 

126 quantum = self.make_quantum() 

127 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

128 

129 # check state after initialize 

130 self.assertTrue(qbb.isWriteable()) 

131 self.assertEqual(qbb._predicted_inputs, set(ref.id for ref in self.input_refs)) 

132 self.assertEqual(qbb._predicted_outputs, set(ref.id for ref in self.output_refs)) 

133 self.assertEqual(qbb._available_inputs, set()) 

134 self.assertEqual(qbb._unavailable_inputs, set()) 

135 self.assertEqual(qbb._actual_inputs, set()) 

136 self.assertEqual(qbb._actual_output_refs, set()) 

137 

138 def test_getPutDirect(self): 

139 """Test for getDirect/putDirect methods""" 

140 

141 quantum = self.make_quantum() 

142 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

143 

144 # Verify all input data are readable. 

145 for ref in self.input_refs: 

146 data = qbb.getDirect(ref) 

147 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

148 for ref in self.init_inputs_refs: 

149 data = qbb.getDirect(ref) 

150 self.assertEqual(data, {"data": -1}) 

151 for ref in self.missing_refs: 

152 with self.assertRaises(FileNotFoundError): 

153 data = qbb.getDirect(ref) 

154 

155 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

156 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

157 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs)) 

158 

159 # Write all expected outputs. 

160 for ref in self.output_refs: 

161 qbb.putDirect({"data": ref.dataId["detector"] ** 2}, ref) 

162 

163 # Must be able to read them back 

164 for ref in self.output_refs: 

165 data = qbb.getDirect(ref) 

166 self.assertEqual(data, {"data": ref.dataId["detector"] ** 2}) 

167 

168 self.assertEqual(qbb._actual_output_refs, set(self.output_refs)) 

169 

170 def test_getDirectDeferred(self): 

171 """Test for getDirectDeferred method""" 

172 

173 quantum = self.make_quantum() 

174 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

175 

176 # get some input data 

177 input_refs = self.input_refs[:2] 

178 for ref in input_refs: 

179 data = qbb.getDirectDeferred(ref) 

180 self.assertEqual(data.get(), {"data": ref.dataId["detector"]}) 

181 for ref in self.init_inputs_refs: 

182 data = qbb.getDirectDeferred(ref) 

183 self.assertEqual(data.get(), {"data": -1}) 

184 for ref in self.missing_refs: 

185 data = qbb.getDirectDeferred(ref) 

186 with self.assertRaises(FileNotFoundError): 

187 data.get() 

188 

189 # _avalable_inputs is not 

190 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs)) 

191 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in input_refs)) 

192 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs)) 

193 

194 def test_datasetExistsDirect(self): 

195 """Test for datasetExistsDirect method""" 

196 

197 quantum = self.make_quantum() 

198 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

199 

200 # get some input data 

201 input_refs = self.input_refs[:2] 

202 for ref in input_refs: 

203 exists = qbb.datasetExistsDirect(ref) 

204 self.assertTrue(exists) 

205 for ref in self.init_inputs_refs: 

206 exists = qbb.datasetExistsDirect(ref) 

207 self.assertTrue(exists) 

208 for ref in self.missing_refs: 

209 exists = qbb.datasetExistsDirect(ref) 

210 self.assertFalse(exists) 

211 

212 # _available_inputs is not 

213 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs)) 

214 self.assertEqual(qbb._actual_inputs, set()) 

215 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect? 

216 

217 def test_markInputUnused(self): 

218 """Test for markInputUnused method""" 

219 

220 quantum = self.make_quantum() 

221 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

222 

223 # get some input data 

224 for ref in self.input_refs: 

225 data = qbb.getDirect(ref) 

226 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

227 for ref in self.init_inputs_refs: 

228 data = qbb.getDirect(ref) 

229 self.assertEqual(data, {"data": -1}) 

230 

231 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

232 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

233 

234 qbb.markInputUnused(self.input_refs[0]) 

235 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in self.input_refs[1:])) 

236 

237 def test_pruneDatasets(self): 

238 """Test for pruneDatasets methods""" 

239 

240 quantum = self.make_quantum() 

241 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

242 

243 # Write all expected outputs. 

244 for ref in self.output_refs: 

245 qbb.putDirect({"data": ref.dataId["detector"] ** 2}, ref) 

246 

247 # Must be able to read them back 

248 for ref in self.output_refs: 

249 data = qbb.getDirect(ref) 

250 self.assertEqual(data, {"data": ref.dataId["detector"] ** 2}) 

251 

252 # Check for invalid arguments. 

253 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"): 

254 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True) 

255 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"): 

256 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True) 

257 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"): 

258 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False) 

259 

260 # Disassociate only. 

261 ref = self.output_refs[0] 

262 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False) 

263 self.assertFalse(qbb.datasetExistsDirect(ref)) 

264 with self.assertRaises(FileNotFoundError): 

265 data = qbb.getDirect(ref) 

266 

267 # can store it again 

268 qbb.putDirect({"data": ref.dataId["detector"] ** 2}, ref) 

269 self.assertTrue(qbb.datasetExistsDirect(ref)) 

270 

271 # Purge completely. 

272 ref = self.output_refs[1] 

273 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True) 

274 self.assertFalse(qbb.datasetExistsDirect(ref)) 

275 with self.assertRaises(FileNotFoundError): 

276 data = qbb.getDirect(ref) 

277 qbb.putDirect({"data": ref.dataId["detector"] ** 2}, ref) 

278 self.assertTrue(qbb.datasetExistsDirect(ref)) 

279 

280 def test_extract_provenance_data(self): 

281 """Test for extract_provenance_data method""" 

282 

283 quantum = self.make_quantum() 

284 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe) 

285 

286 # read/store everything 

287 for ref in self.input_refs: 

288 qbb.getDirect(ref) 

289 for ref in self.init_inputs_refs: 

290 qbb.getDirect(ref) 

291 for ref in self.output_refs: 

292 qbb.putDirect({"data": ref.dataId["detector"] ** 2}, ref) 

293 

294 provenance1 = qbb.extract_provenance_data() 

295 prov_json = provenance1.json() 

296 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json)) 

297 for provenance in (provenance1, provenance2): 

298 input_ids = set(ref.id for ref in self.input_refs) 

299 self.assertEqual(provenance.predicted_inputs, input_ids) 

300 self.assertEqual(provenance.available_inputs, input_ids) 

301 self.assertEqual(provenance.actual_inputs, input_ids) 

302 output_ids = set(ref.id for ref in self.output_refs) 

303 self.assertEqual(provenance.predicted_outputs, output_ids) 

304 self.assertEqual(provenance.actual_outputs, output_ids) 

305 datastore_name = "FileDatastore@<butlerRoot>/datastore" 

306 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name}) 

307 datastore_records = provenance.datastore_records[datastore_name] 

308 self.assertEqual(set(datastore_records.dataset_ids), output_ids) 

309 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo" 

310 table_name = "file_datastore_records" 

311 self.assertEqual(set(datastore_records.records.keys()), {class_name}) 

312 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name}) 

313 self.assertEqual( 

314 set(record["dataset_id"] for record in datastore_records.records[class_name][table_name]), 

315 output_ids, 

316 ) 

317 

318 def test_collect_and_transfer(self): 

319 """Test for collect_and_transfer method""" 

320 

321 quantum1 = self.make_quantum(1) 

322 qbb1 = QuantumBackedButler.initialize(config=self.config, quantum=quantum1, dimensions=self.universe) 

323 

324 quantum2 = self.make_quantum(2) 

325 qbb2 = QuantumBackedButler.initialize(config=self.config, quantum=quantum2, dimensions=self.universe) 

326 

327 # read/store everything 

328 for ref in self.input_refs: 

329 qbb1.getDirect(ref) 

330 for ref in self.init_inputs_refs: 

331 qbb1.getDirect(ref) 

332 for ref in self.output_refs: 

333 qbb1.putDirect({"data": ref.dataId["detector"] ** 2}, ref) 

334 

335 for ref in self.output_refs: 

336 qbb2.getDirect(ref) 

337 for ref in self.output_refs2: 

338 qbb2.putDirect({"data": ref.dataId["detector"] ** 3}, ref) 

339 

340 QuantumProvenanceData.collect_and_transfer( 

341 self.butler, 

342 [quantum1, quantum2], 

343 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()], 

344 ) 

345 

346 for ref in self.output_refs: 

347 data = self.butler.getDirect(ref) 

348 self.assertEqual(data, {"data": ref.dataId["detector"] ** 2}) 

349 

350 for ref in self.output_refs2: 

351 data = self.butler.getDirect(ref) 

352 self.assertEqual(data, {"data": ref.dataId["detector"] ** 3}) 

353 

354 

355if __name__ == "__main__": 355 ↛ 356line 355 didn't jump to line 356, because the condition on line 355 was never true

356 unittest.main()