Coverage for tests/test_quantumBackedButler.py: 7%

213 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-03 09:15 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import json 

23import os 

24import unittest 

25from typing import cast 

26 

27from lsst.daf.butler import ( 

28 Butler, 

29 Config, 

30 DatasetRef, 

31 DatasetType, 

32 DimensionUniverse, 

33 Quantum, 

34 QuantumBackedButler, 

35 QuantumProvenanceData, 

36 Registry, 

37 RegistryConfig, 

38 StorageClass, 

39) 

40from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

41 

42TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

43 

44 

45class QuantumBackedButlerTestCase(unittest.TestCase): 

46 """Test case for QuantumBackedButler.""" 

47 

48 def setUp(self) -> None: 

49 self.root = makeTestTempDir(TESTDIR) 

50 self.config = Config() 

51 self.config["root"] = self.root 

52 self.universe = DimensionUniverse() 

53 

54 # Make a butler and import dimension definitions. 

55 registryConfig = RegistryConfig(self.config.get("registry")) 

56 Registry.createFromConfig(registryConfig, butlerRoot=self.root) 

57 self.butler = Butler(self.config, writeable=True, run="RUN") 

58 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

59 

60 # make all dataset types 

61 graph = self.universe.extract(("instrument", "detector")) 

62 storageClass = StorageClass("StructuredDataDict") 

63 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass) 

64 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass) 

65 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass) 

66 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass) 

67 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass) 

68 

69 self.dataset_types: dict[str, DatasetType] = {} 

70 dataset_types = ( 

71 self.datasetTypeInit, 

72 self.datasetTypeInput, 

73 self.datasetTypeOutput, 

74 self.datasetTypeOutput2, 

75 self.datasetTypeExtra, 

76 ) 

77 for dataset_type in dataset_types: 

78 self.butler.registry.registerDatasetType(dataset_type) 

79 self.dataset_types[dataset_type.name] = dataset_type 

80 

81 dataIds = [ 

82 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id)) 

83 for detector_id in (1, 2, 3, 4) 

84 ] 

85 

86 # make actual input datasets 

87 self.input_refs = [ 

88 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds 

89 ] 

90 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])] 

91 self.all_input_refs = self.input_refs + self.init_inputs_refs 

92 

93 # generate dataset refs for outputs 

94 self.output_refs = [DatasetRef(self.datasetTypeOutput, dataId, run="RUN") for dataId in dataIds] 

95 self.output_refs2 = [DatasetRef(self.datasetTypeOutput2, dataId, run="RUN") for dataId in dataIds] 

96 

97 self.missing_refs = [DatasetRef(self.datasetTypeExtra, dataId, run="RUN") for dataId in dataIds] 

98 

99 def tearDown(self) -> None: 

100 removeTestTempDir(self.root) 

101 

102 def make_quantum(self, step: int = 1) -> Quantum: 

103 """Make a Quantum which includes datastore records.""" 

104 

105 if step == 1: 

106 datastore_records = self.butler.datastore.export_records(self.all_input_refs) 

107 predictedInputs = {self.datasetTypeInput: self.input_refs} 

108 outputs = {self.datasetTypeOutput: self.output_refs} 

109 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]} 

110 elif step == 2: 

111 # The result should be empty, this is just to test that it works. 

112 datastore_records = self.butler.datastore.export_records(self.output_refs) 

113 predictedInputs = {self.datasetTypeInput: self.output_refs} 

114 outputs = {self.datasetTypeOutput2: self.output_refs2} 

115 initInputs = {} 

116 else: 

117 raise ValueError(f"unexpected {step} value") 

118 

119 return Quantum( 

120 taskName="some.task.name", 

121 inputs=predictedInputs, 

122 outputs=outputs, 

123 initInputs=initInputs, 

124 datastore_records=datastore_records, 

125 ) 

126 

127 def test_initialize(self) -> None: 

128 """Test for initialize factory method""" 

129 

130 quantum = self.make_quantum() 

131 qbb = QuantumBackedButler.initialize( 

132 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

133 ) 

134 self._test_factory(qbb) 

135 

136 def test_from_predicted(self) -> None: 

137 """Test for from_predicted factory method""" 

138 

139 datastore_records = self.butler.datastore.export_records(self.all_input_refs) 

140 qbb = QuantumBackedButler.from_predicted( 

141 config=self.config, 

142 predicted_inputs=[ref.getCheckedId() for ref in self.all_input_refs], 

143 predicted_outputs=[ref.getCheckedId() for ref in self.output_refs], 

144 dimensions=self.universe, 

145 datastore_records=datastore_records, 

146 dataset_types=self.dataset_types, 

147 ) 

148 self._test_factory(qbb) 

149 

150 def _test_factory(self, qbb: QuantumBackedButler) -> None: 

151 """Test state immediately after construction.""" 

152 

153 self.assertTrue(qbb.isWriteable()) 

154 self.assertEqual(qbb._predicted_inputs, set(ref.id for ref in self.all_input_refs)) 

155 self.assertEqual(qbb._predicted_outputs, set(ref.id for ref in self.output_refs)) 

156 self.assertEqual(qbb._available_inputs, set()) 

157 self.assertEqual(qbb._unavailable_inputs, set()) 

158 self.assertEqual(qbb._actual_inputs, set()) 

159 self.assertEqual(qbb._actual_output_refs, set()) 

160 

161 def test_getput(self) -> None: 

162 """Test for getDirect/putDirect methods""" 

163 

164 quantum = self.make_quantum() 

165 qbb = QuantumBackedButler.initialize( 

166 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

167 ) 

168 

169 # Verify all input data are readable. 

170 for ref in self.input_refs: 

171 data = qbb.get(ref) 

172 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

173 for ref in self.init_inputs_refs: 

174 data = qbb.get(ref) 

175 self.assertEqual(data, {"data": -1}) 

176 for ref in self.missing_refs: 

177 with self.assertRaises(FileNotFoundError): 

178 data = qbb.get(ref) 

179 

180 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

181 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

182 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs)) 

183 

184 # Write all expected outputs. 

185 for ref in self.output_refs: 

186 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

187 

188 # Must be able to read them back 

189 for ref in self.output_refs: 

190 data = qbb.get(ref) 

191 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

192 

193 self.assertEqual(qbb._actual_output_refs, set(self.output_refs)) 

194 

195 def test_getDeferred(self) -> None: 

196 """Test for getDirectDeferred method""" 

197 

198 quantum = self.make_quantum() 

199 qbb = QuantumBackedButler.initialize( 

200 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

201 ) 

202 

203 # get some input data 

204 input_refs = self.input_refs[:2] 

205 for ref in input_refs: 

206 data = qbb.getDeferred(ref) 

207 self.assertEqual(data.get(), {"data": ref.dataId["detector"]}) 

208 for ref in self.init_inputs_refs: 

209 data = qbb.getDeferred(ref) 

210 self.assertEqual(data.get(), {"data": -1}) 

211 for ref in self.missing_refs: 

212 data = qbb.getDeferred(ref) 

213 with self.assertRaises(FileNotFoundError): 

214 data.get() 

215 

216 # _avalable_inputs is not 

217 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs)) 

218 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs)) 

219 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs)) 

220 

221 def test_datasetExistsDirect(self) -> None: 

222 """Test for datasetExistsDirect method""" 

223 

224 quantum = self.make_quantum() 

225 qbb = QuantumBackedButler.initialize( 

226 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

227 ) 

228 

229 # get some input data 

230 input_refs = self.input_refs[:2] 

231 for ref in input_refs: 

232 exists = qbb.datasetExistsDirect(ref) 

233 self.assertTrue(exists) 

234 for ref in self.init_inputs_refs: 

235 exists = qbb.datasetExistsDirect(ref) 

236 self.assertTrue(exists) 

237 for ref in self.missing_refs: 

238 exists = qbb.datasetExistsDirect(ref) 

239 self.assertFalse(exists) 

240 

241 # _available_inputs is not 

242 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs)) 

243 self.assertEqual(qbb._actual_inputs, set()) 

244 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect? 

245 

246 def test_markInputUnused(self) -> None: 

247 """Test for markInputUnused method""" 

248 

249 quantum = self.make_quantum() 

250 qbb = QuantumBackedButler.initialize( 

251 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

252 ) 

253 

254 # get some input data 

255 for ref in self.input_refs: 

256 data = qbb.get(ref) 

257 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

258 for ref in self.init_inputs_refs: 

259 data = qbb.get(ref) 

260 self.assertEqual(data, {"data": -1}) 

261 

262 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

263 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

264 

265 qbb.markInputUnused(self.input_refs[0]) 

266 self.assertEqual( 

267 qbb._actual_inputs, set(ref.id for ref in self.input_refs[1:] + self.init_inputs_refs) 

268 ) 

269 

270 def test_pruneDatasets(self) -> None: 

271 """Test for pruneDatasets methods""" 

272 

273 quantum = self.make_quantum() 

274 qbb = QuantumBackedButler.initialize( 

275 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

276 ) 

277 

278 # Write all expected outputs. 

279 for ref in self.output_refs: 

280 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

281 

282 # Must be able to read them back 

283 for ref in self.output_refs: 

284 data = qbb.get(ref) 

285 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

286 

287 # Check for invalid arguments. 

288 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"): 

289 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True) 

290 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"): 

291 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True) 

292 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"): 

293 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False) 

294 

295 # Disassociate only. 

296 ref = self.output_refs[0] 

297 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False) 

298 self.assertFalse(qbb.datasetExistsDirect(ref)) 

299 with self.assertRaises(FileNotFoundError): 

300 data = qbb.get(ref) 

301 

302 # can store it again 

303 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

304 self.assertTrue(qbb.datasetExistsDirect(ref)) 

305 

306 # Purge completely. 

307 ref = self.output_refs[1] 

308 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True) 

309 self.assertFalse(qbb.datasetExistsDirect(ref)) 

310 with self.assertRaises(FileNotFoundError): 

311 data = qbb.get(ref) 

312 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

313 self.assertTrue(qbb.datasetExistsDirect(ref)) 

314 

315 def test_extract_provenance_data(self) -> None: 

316 """Test for extract_provenance_data method""" 

317 

318 quantum = self.make_quantum() 

319 qbb = QuantumBackedButler.initialize( 

320 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

321 ) 

322 

323 # read/store everything 

324 for ref in self.input_refs: 

325 qbb.get(ref) 

326 for ref in self.init_inputs_refs: 

327 qbb.get(ref) 

328 for ref in self.output_refs: 

329 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

330 

331 provenance1 = qbb.extract_provenance_data() 

332 prov_json = provenance1.json() 

333 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json)) 

334 for provenance in (provenance1, provenance2): 

335 input_ids = set(ref.id for ref in self.input_refs + self.init_inputs_refs) 

336 self.assertEqual(provenance.predicted_inputs, input_ids) 

337 self.assertEqual(provenance.available_inputs, input_ids) 

338 self.assertEqual(provenance.actual_inputs, input_ids) 

339 output_ids = set(ref.id for ref in self.output_refs) 

340 self.assertEqual(provenance.predicted_outputs, output_ids) 

341 self.assertEqual(provenance.actual_outputs, output_ids) 

342 datastore_name = "FileDatastore@<butlerRoot>/datastore" 

343 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name}) 

344 datastore_records = provenance.datastore_records[datastore_name] 

345 self.assertEqual(set(datastore_records.dataset_ids), output_ids) 

346 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo" 

347 table_name = "file_datastore_records" 

348 self.assertEqual(set(datastore_records.records.keys()), {class_name}) 

349 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name}) 

350 self.assertEqual( 

351 set(record["dataset_id"] for record in datastore_records.records[class_name][table_name]), 

352 output_ids, 

353 ) 

354 

355 def test_collect_and_transfer(self) -> None: 

356 """Test for collect_and_transfer method""" 

357 

358 quantum1 = self.make_quantum(1) 

359 qbb1 = QuantumBackedButler.initialize( 

360 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types 

361 ) 

362 

363 quantum2 = self.make_quantum(2) 

364 qbb2 = QuantumBackedButler.initialize( 

365 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types 

366 ) 

367 

368 # read/store everything 

369 for ref in self.input_refs: 

370 qbb1.get(ref) 

371 for ref in self.init_inputs_refs: 

372 qbb1.get(ref) 

373 for ref in self.output_refs: 

374 qbb1.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

375 

376 for ref in self.output_refs: 

377 qbb2.get(ref) 

378 for ref in self.output_refs2: 

379 qbb2.put({"data": cast(int, ref.dataId["detector"]) ** 3}, ref) 

380 

381 QuantumProvenanceData.collect_and_transfer( 

382 self.butler, 

383 [quantum1, quantum2], 

384 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()], 

385 ) 

386 

387 for ref in self.output_refs: 

388 data = self.butler.get(ref) 

389 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

390 

391 for ref in self.output_refs2: 

392 data = self.butler.get(ref) 

393 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3}) 

394 

395 

396if __name__ == "__main__": 

397 unittest.main()