Coverage for tests/test_quantumBackedButler.py: 7%

237 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-18 09:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import json 

29import os 

30import unittest 

31import unittest.mock 

32from typing import cast 

33 

34from lsst.daf.butler import ( 

35 Butler, 

36 Config, 

37 DatasetRef, 

38 DatasetType, 

39 DimensionUniverse, 

40 Quantum, 

41 QuantumBackedButler, 

42 QuantumProvenanceData, 

43 RegistryConfig, 

44 StorageClass, 

45) 

46from lsst.daf.butler.direct_butler import DirectButler 

47from lsst.daf.butler.registry import _RegistryFactory 

48from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

49from lsst.resources import ResourcePath 

50 

51TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

52 

53 

54class QuantumBackedButlerTestCase(unittest.TestCase): 

55 """Test case for QuantumBackedButler.""" 

56 

57 def setUp(self) -> None: 

58 self.root = makeTestTempDir(TESTDIR) 

59 self.config = Config() 

60 self.config["root"] = self.root 

61 self.universe = DimensionUniverse() 

62 

63 # Make a butler and import dimension definitions. 

64 registryConfig = RegistryConfig(self.config.get("registry")) 

65 _RegistryFactory(registryConfig).create_from_config(butlerRoot=self.root) 

66 butler = Butler.from_config(self.config, writeable=True, run="RUN") 

67 assert isinstance(butler, DirectButler) 

68 self.butler = butler 

69 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

70 

71 # make all dataset types 

72 graph = self.universe.conform(("instrument", "detector")) 

73 storageClass = StorageClass("StructuredDataDict") 

74 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass) 

75 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass) 

76 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass) 

77 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass) 

78 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass) 

79 

80 self.dataset_types: dict[str, DatasetType] = {} 

81 dataset_types = ( 

82 self.datasetTypeInit, 

83 self.datasetTypeInput, 

84 self.datasetTypeOutput, 

85 self.datasetTypeOutput2, 

86 self.datasetTypeExtra, 

87 ) 

88 for dataset_type in dataset_types: 

89 self.butler.registry.registerDatasetType(dataset_type) 

90 self.dataset_types[dataset_type.name] = dataset_type 

91 

92 dataIds = [ 

93 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id)) 

94 for detector_id in (1, 2, 3, 4) 

95 ] 

96 

97 # make actual input datasets 

98 self.input_refs = [ 

99 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds 

100 ] 

101 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])] 

102 self.all_input_refs = self.input_refs + self.init_inputs_refs 

103 

104 # generate dataset refs for outputs 

105 self.output_refs = [DatasetRef(self.datasetTypeOutput, dataId, run="RUN") for dataId in dataIds] 

106 self.output_refs2 = [DatasetRef(self.datasetTypeOutput2, dataId, run="RUN") for dataId in dataIds] 

107 

108 self.missing_refs = [DatasetRef(self.datasetTypeExtra, dataId, run="RUN") for dataId in dataIds] 

109 

110 def tearDown(self) -> None: 

111 removeTestTempDir(self.root) 

112 

113 def make_quantum(self, step: int = 1) -> Quantum: 

114 """Make a Quantum which includes datastore records.""" 

115 if step == 1: 

116 datastore_records = self.butler._datastore.export_records(self.all_input_refs) 

117 predictedInputs = {self.datasetTypeInput: self.input_refs} 

118 outputs = {self.datasetTypeOutput: self.output_refs} 

119 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]} 

120 elif step == 2: 

121 # The result should be empty, this is just to test that it works. 

122 datastore_records = self.butler._datastore.export_records(self.output_refs) 

123 predictedInputs = {self.datasetTypeInput: self.output_refs} 

124 outputs = {self.datasetTypeOutput2: self.output_refs2} 

125 initInputs = {} 

126 else: 

127 raise ValueError(f"unexpected {step} value") 

128 

129 return Quantum( 

130 taskName="some.task.name", 

131 inputs=predictedInputs, 

132 outputs=outputs, 

133 initInputs=initInputs, 

134 datastore_records=datastore_records, 

135 ) 

136 

137 def test_initialize(self) -> None: 

138 """Test for initialize factory method""" 

139 quantum = self.make_quantum() 

140 qbb = QuantumBackedButler.initialize( 

141 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

142 ) 

143 self._test_factory(qbb) 

144 

145 def test_initialize_repo_index(self) -> None: 

146 """Test for initialize using config file and repo index.""" 

147 # Store config to a file. 

148 self.config.dumpToUri(self.root) 

149 

150 butler_index = Config() 

151 butler_index["label"] = self.root 

152 with ResourcePath.temporary_uri(suffix=".yaml") as index_path: 

153 butler_index.dumpToUri(index_path) 

154 

155 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(index_path)}): 

156 quantum = self.make_quantum() 

157 qbb = QuantumBackedButler.initialize( 

158 config="label", 

159 quantum=quantum, 

160 dimensions=self.universe, 

161 dataset_types=self.dataset_types, 

162 ) 

163 self._test_factory(qbb) 

164 

165 def test_from_predicted(self) -> None: 

166 """Test for from_predicted factory method""" 

167 datastore_records = self.butler._datastore.export_records(self.all_input_refs) 

168 qbb = QuantumBackedButler.from_predicted( 

169 config=self.config, 

170 predicted_inputs=[ref.id for ref in self.all_input_refs], 

171 predicted_outputs=[ref.id for ref in self.output_refs], 

172 dimensions=self.universe, 

173 datastore_records=datastore_records, 

174 dataset_types=self.dataset_types, 

175 ) 

176 self._test_factory(qbb) 

177 

178 def _test_factory(self, qbb: QuantumBackedButler) -> None: 

179 """Test state immediately after construction.""" 

180 self.assertTrue(qbb.isWriteable()) 

181 self.assertEqual(qbb._predicted_inputs, {ref.id for ref in self.all_input_refs}) 

182 self.assertEqual(qbb._predicted_outputs, {ref.id for ref in self.output_refs}) 

183 self.assertEqual(qbb._available_inputs, set()) 

184 self.assertEqual(qbb._unavailable_inputs, set()) 

185 self.assertEqual(qbb._actual_inputs, set()) 

186 self.assertEqual(qbb._actual_output_refs, set()) 

187 

188 def test_getput(self) -> None: 

189 """Test for get/put methods""" 

190 quantum = self.make_quantum() 

191 qbb = QuantumBackedButler.initialize( 

192 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

193 ) 

194 

195 # Verify all input data are readable. 

196 for ref in self.input_refs: 

197 data = qbb.get(ref) 

198 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

199 for ref in self.init_inputs_refs: 

200 data = qbb.get(ref) 

201 self.assertEqual(data, {"data": -1}) 

202 for ref in self.missing_refs: 

203 with self.assertRaises(FileNotFoundError): 

204 data = qbb.get(ref) 

205 

206 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

207 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

208 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs}) 

209 

210 # Write all expected outputs. 

211 for ref in self.output_refs: 

212 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

213 

214 # Must be able to read them back 

215 for ref in self.output_refs: 

216 data = qbb.get(ref) 

217 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

218 

219 self.assertEqual(qbb._actual_output_refs, set(self.output_refs)) 

220 

221 def test_getDeferred(self) -> None: 

222 """Test for getDeferred method""" 

223 quantum = self.make_quantum() 

224 qbb = QuantumBackedButler.initialize( 

225 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

226 ) 

227 

228 # get some input data 

229 input_refs = self.input_refs[:2] 

230 for ref in input_refs: 

231 data = qbb.getDeferred(ref) 

232 self.assertEqual(data.get(), {"data": ref.dataId["detector"]}) 

233 for ref in self.init_inputs_refs: 

234 data = qbb.getDeferred(ref) 

235 self.assertEqual(data.get(), {"data": -1}) 

236 for ref in self.missing_refs: 

237 data = qbb.getDeferred(ref) 

238 with self.assertRaises(FileNotFoundError): 

239 data.get() 

240 

241 # _avalable_inputs is not 

242 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs}) 

243 self.assertEqual(qbb._actual_inputs, {ref.id for ref in input_refs + self.init_inputs_refs}) 

244 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs}) 

245 

246 def test_stored(self) -> None: 

247 """Test for dataset existence method""" 

248 quantum = self.make_quantum() 

249 qbb = QuantumBackedButler.initialize( 

250 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

251 ) 

252 

253 # get some input data 

254 input_refs = self.input_refs[:2] 

255 for ref in input_refs: 

256 exists = qbb.stored(ref) 

257 self.assertTrue(exists) 

258 for ref in self.init_inputs_refs: 

259 exists = qbb.stored(ref) 

260 self.assertTrue(exists) 

261 for ref in self.missing_refs: 

262 exists = qbb.stored(ref) 

263 self.assertFalse(exists) 

264 

265 # Now do the same checks in bulk. 

266 missing_set = set(self.missing_refs) 

267 refs = input_refs + self.init_inputs_refs + self.missing_refs 

268 stored_many = qbb.stored_many(refs) 

269 for ref, stored in stored_many.items(): 

270 if ref in missing_set: 

271 self.assertFalse(stored) 

272 else: 

273 self.assertTrue(stored) 

274 

275 # _available_inputs is not 

276 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs}) 

277 self.assertEqual(qbb._actual_inputs, set()) 

278 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with get? 

279 

280 def test_markInputUnused(self) -> None: 

281 """Test for markInputUnused method""" 

282 quantum = self.make_quantum() 

283 qbb = QuantumBackedButler.initialize( 

284 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

285 ) 

286 

287 # get some input data 

288 for ref in self.input_refs: 

289 data = qbb.get(ref) 

290 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

291 for ref in self.init_inputs_refs: 

292 data = qbb.get(ref) 

293 self.assertEqual(data, {"data": -1}) 

294 

295 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

296 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

297 

298 qbb.markInputUnused(self.input_refs[0]) 

299 self.assertEqual(qbb._actual_inputs, {ref.id for ref in self.input_refs[1:] + self.init_inputs_refs}) 

300 

301 def test_pruneDatasets(self) -> None: 

302 """Test for pruneDatasets methods""" 

303 quantum = self.make_quantum() 

304 qbb = QuantumBackedButler.initialize( 

305 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

306 ) 

307 

308 # Write all expected outputs. 

309 for ref in self.output_refs: 

310 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

311 

312 # Must be able to read them back 

313 for ref in self.output_refs: 

314 data = qbb.get(ref) 

315 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

316 

317 # Check for invalid arguments. 

318 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"): 

319 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True) 

320 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"): 

321 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True) 

322 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"): 

323 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False) 

324 

325 # Disassociate only. 

326 ref = self.output_refs[0] 

327 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False) 

328 self.assertFalse(qbb.stored(ref)) 

329 with self.assertRaises(FileNotFoundError): 

330 data = qbb.get(ref) 

331 

332 # can store it again 

333 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

334 self.assertTrue(qbb.stored(ref)) 

335 

336 # Purge completely. 

337 ref = self.output_refs[1] 

338 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True) 

339 self.assertFalse(qbb.stored(ref)) 

340 with self.assertRaises(FileNotFoundError): 

341 data = qbb.get(ref) 

342 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

343 self.assertTrue(qbb.stored(ref)) 

344 

345 def test_extract_provenance_data(self) -> None: 

346 """Test for extract_provenance_data method""" 

347 quantum = self.make_quantum() 

348 qbb = QuantumBackedButler.initialize( 

349 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

350 ) 

351 

352 # read/store everything 

353 for ref in self.input_refs: 

354 qbb.get(ref) 

355 for ref in self.init_inputs_refs: 

356 qbb.get(ref) 

357 for ref in self.output_refs: 

358 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

359 

360 provenance1 = qbb.extract_provenance_data() 

361 prov_json = provenance1.model_dump_json() 

362 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json)) 

363 for provenance in (provenance1, provenance2): 

364 input_ids = {ref.id for ref in self.input_refs + self.init_inputs_refs} 

365 self.assertEqual(provenance.predicted_inputs, input_ids) 

366 self.assertEqual(provenance.available_inputs, input_ids) 

367 self.assertEqual(provenance.actual_inputs, input_ids) 

368 output_ids = {ref.id for ref in self.output_refs} 

369 self.assertEqual(provenance.predicted_outputs, output_ids) 

370 self.assertEqual(provenance.actual_outputs, output_ids) 

371 datastore_name = "FileDatastore@<butlerRoot>/datastore" 

372 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name}) 

373 datastore_records = provenance.datastore_records[datastore_name] 

374 self.assertEqual(set(datastore_records.dataset_ids), output_ids) 

375 class_name = "lsst.daf.butler.datastore.stored_file_info.StoredFileInfo" 

376 self.assertEqual(set(datastore_records.records.keys()), {class_name}) 

377 self.assertEqual(set(datastore_records.records[class_name].keys()), {id.hex for id in output_ids}) 

378 table_name = "file_datastore_records" 

379 for dataset_data in datastore_records.records[class_name].values(): 

380 self.assertEqual(set(dataset_data), {table_name}) 

381 

382 def test_collect_and_transfer(self) -> None: 

383 """Test for collect_and_transfer method""" 

384 quantum1 = self.make_quantum(1) 

385 qbb1 = QuantumBackedButler.initialize( 

386 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types 

387 ) 

388 

389 quantum2 = self.make_quantum(2) 

390 qbb2 = QuantumBackedButler.initialize( 

391 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types 

392 ) 

393 

394 # read/store everything 

395 for ref in self.input_refs: 

396 qbb1.get(ref) 

397 for ref in self.init_inputs_refs: 

398 qbb1.get(ref) 

399 for ref in self.output_refs: 

400 qbb1.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

401 

402 for ref in self.output_refs: 

403 qbb2.get(ref) 

404 for ref in self.output_refs2: 

405 qbb2.put({"data": cast(int, ref.dataId["detector"]) ** 3}, ref) 

406 

407 QuantumProvenanceData.collect_and_transfer( 

408 self.butler, 

409 [quantum1, quantum2], 

410 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()], 

411 ) 

412 

413 for ref in self.output_refs: 

414 data = self.butler.get(ref) 

415 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

416 

417 for ref in self.output_refs2: 

418 data = self.butler.get(ref) 

419 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3}) 

420 

421 

422if __name__ == "__main__": 

423 unittest.main()