Coverage for tests/test_quantumBackedButler.py: 7%
216 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-07 10:26 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-07 10:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import json
23import os
24import unittest
25import uuid
26from typing import cast
28from lsst.daf.butler import (
29 Butler,
30 Config,
31 DatasetRef,
32 DatasetType,
33 DimensionUniverse,
34 Quantum,
35 QuantumBackedButler,
36 QuantumProvenanceData,
37 Registry,
38 RegistryConfig,
39 StorageClass,
40)
41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
43TESTDIR = os.path.abspath(os.path.dirname(__file__))
46class QuantumBackedButlerTestCase(unittest.TestCase):
47 """Test case for QuantumBackedButler."""
49 def setUp(self) -> None:
50 self.root = makeTestTempDir(TESTDIR)
51 self.config = Config()
52 self.config["root"] = self.root
53 self.universe = DimensionUniverse()
55 # Make a butler and import dimension definitions.
56 registryConfig = RegistryConfig(self.config.get("registry"))
57 Registry.createFromConfig(registryConfig, butlerRoot=self.root)
58 self.butler = Butler(self.config, writeable=True, run="RUN")
59 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
61 # make all dataset types
62 graph = self.universe.extract(("instrument", "detector"))
63 storageClass = StorageClass("StructuredDataDict")
64 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass)
65 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass)
66 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass)
67 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass)
68 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass)
69 self.butler.registry.registerDatasetType(self.datasetTypeInit)
70 self.butler.registry.registerDatasetType(self.datasetTypeInput)
71 self.butler.registry.registerDatasetType(self.datasetTypeOutput)
72 self.butler.registry.registerDatasetType(self.datasetTypeOutput2)
73 self.butler.registry.registerDatasetType(self.datasetTypeExtra)
75 dataIds = [
76 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id))
77 for detector_id in (1, 2, 3, 4)
78 ]
80 # make actual input datasets
81 self.input_refs = [
82 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds
83 ]
84 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])]
85 self.all_input_refs = self.input_refs + self.init_inputs_refs
87 # generate dataset refs for outputs
88 self.output_refs = [
89 DatasetRef(self.datasetTypeOutput, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds
90 ]
91 self.output_refs2 = [
92 DatasetRef(self.datasetTypeOutput2, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds
93 ]
95 self.missing_refs = [
96 DatasetRef(self.datasetTypeExtra, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds
97 ]
99 def tearDown(self) -> None:
100 removeTestTempDir(self.root)
102 def make_quantum(self, step: int = 1) -> Quantum:
103 """Make a Quantum which includes datastore records."""
105 if step == 1:
106 datastore_records = self.butler.datastore.export_records(self.all_input_refs)
107 predictedInputs = {self.datasetTypeInput: self.input_refs}
108 outputs = {self.datasetTypeOutput: self.output_refs}
109 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]}
110 elif step == 2:
111 # The result should be empty, this is just to test that it works.
112 datastore_records = self.butler.datastore.export_records(self.output_refs)
113 predictedInputs = {self.datasetTypeInput: self.output_refs}
114 outputs = {self.datasetTypeOutput2: self.output_refs2}
115 initInputs = {}
116 else:
117 raise ValueError(f"unexpected {step} value")
119 return Quantum(
120 taskName="some.task.name",
121 inputs=predictedInputs,
122 outputs=outputs,
123 initInputs=initInputs,
124 datastore_records=datastore_records,
125 )
127 def test_initialize(self) -> None:
128 """Test for initialize factory method"""
130 quantum = self.make_quantum()
131 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
132 self._test_factory(qbb)
134 def test_from_predicted(self) -> None:
135 """Test for from_predicted factory method"""
137 datastore_records = self.butler.datastore.export_records(self.all_input_refs)
138 qbb = QuantumBackedButler.from_predicted(
139 config=self.config,
140 predicted_inputs=[ref.getCheckedId() for ref in self.all_input_refs],
141 predicted_outputs=[ref.getCheckedId() for ref in self.output_refs],
142 dimensions=self.universe,
143 datastore_records=datastore_records,
144 )
145 self._test_factory(qbb)
147 def _test_factory(self, qbb: QuantumBackedButler) -> None:
148 """Test state immediately after construction."""
150 self.assertTrue(qbb.isWriteable())
151 self.assertEqual(qbb._predicted_inputs, set(ref.id for ref in self.all_input_refs))
152 self.assertEqual(qbb._predicted_outputs, set(ref.id for ref in self.output_refs))
153 self.assertEqual(qbb._available_inputs, set())
154 self.assertEqual(qbb._unavailable_inputs, set())
155 self.assertEqual(qbb._actual_inputs, set())
156 self.assertEqual(qbb._actual_output_refs, set())
158 def test_getPutDirect(self) -> None:
159 """Test for getDirect/putDirect methods"""
161 quantum = self.make_quantum()
162 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
164 # Verify all input data are readable.
165 for ref in self.input_refs:
166 data = qbb.getDirect(ref)
167 self.assertEqual(data, {"data": ref.dataId["detector"]})
168 for ref in self.init_inputs_refs:
169 data = qbb.getDirect(ref)
170 self.assertEqual(data, {"data": -1})
171 for ref in self.missing_refs:
172 with self.assertRaises(FileNotFoundError):
173 data = qbb.getDirect(ref)
175 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
176 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
177 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs))
179 # Write all expected outputs.
180 for ref in self.output_refs:
181 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
183 # Must be able to read them back
184 for ref in self.output_refs:
185 data = qbb.getDirect(ref)
186 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
188 self.assertEqual(qbb._actual_output_refs, set(self.output_refs))
190 def test_getDirectDeferred(self) -> None:
191 """Test for getDirectDeferred method"""
193 quantum = self.make_quantum()
194 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
196 # get some input data
197 input_refs = self.input_refs[:2]
198 for ref in input_refs:
199 data = qbb.getDirectDeferred(ref)
200 self.assertEqual(data.get(), {"data": ref.dataId["detector"]})
201 for ref in self.init_inputs_refs:
202 data = qbb.getDirectDeferred(ref)
203 self.assertEqual(data.get(), {"data": -1})
204 for ref in self.missing_refs:
205 data = qbb.getDirectDeferred(ref)
206 with self.assertRaises(FileNotFoundError):
207 data.get()
209 # _avalable_inputs is not
210 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
211 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
212 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs))
214 def test_datasetExistsDirect(self) -> None:
215 """Test for datasetExistsDirect method"""
217 quantum = self.make_quantum()
218 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
220 # get some input data
221 input_refs = self.input_refs[:2]
222 for ref in input_refs:
223 exists = qbb.datasetExistsDirect(ref)
224 self.assertTrue(exists)
225 for ref in self.init_inputs_refs:
226 exists = qbb.datasetExistsDirect(ref)
227 self.assertTrue(exists)
228 for ref in self.missing_refs:
229 exists = qbb.datasetExistsDirect(ref)
230 self.assertFalse(exists)
232 # _available_inputs is not
233 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
234 self.assertEqual(qbb._actual_inputs, set())
235 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect?
237 def test_markInputUnused(self) -> None:
238 """Test for markInputUnused method"""
240 quantum = self.make_quantum()
241 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
243 # get some input data
244 for ref in self.input_refs:
245 data = qbb.getDirect(ref)
246 self.assertEqual(data, {"data": ref.dataId["detector"]})
247 for ref in self.init_inputs_refs:
248 data = qbb.getDirect(ref)
249 self.assertEqual(data, {"data": -1})
251 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
252 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
254 qbb.markInputUnused(self.input_refs[0])
255 self.assertEqual(
256 qbb._actual_inputs, set(ref.id for ref in self.input_refs[1:] + self.init_inputs_refs)
257 )
259 def test_pruneDatasets(self) -> None:
260 """Test for pruneDatasets methods"""
262 quantum = self.make_quantum()
263 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
265 # Write all expected outputs.
266 for ref in self.output_refs:
267 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
269 # Must be able to read them back
270 for ref in self.output_refs:
271 data = qbb.getDirect(ref)
272 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
274 # Check for invalid arguments.
275 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"):
276 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True)
277 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"):
278 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True)
279 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"):
280 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False)
282 # Disassociate only.
283 ref = self.output_refs[0]
284 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False)
285 self.assertFalse(qbb.datasetExistsDirect(ref))
286 with self.assertRaises(FileNotFoundError):
287 data = qbb.getDirect(ref)
289 # can store it again
290 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
291 self.assertTrue(qbb.datasetExistsDirect(ref))
293 # Purge completely.
294 ref = self.output_refs[1]
295 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True)
296 self.assertFalse(qbb.datasetExistsDirect(ref))
297 with self.assertRaises(FileNotFoundError):
298 data = qbb.getDirect(ref)
299 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
300 self.assertTrue(qbb.datasetExistsDirect(ref))
302 def test_extract_provenance_data(self) -> None:
303 """Test for extract_provenance_data method"""
305 quantum = self.make_quantum()
306 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
308 # read/store everything
309 for ref in self.input_refs:
310 qbb.getDirect(ref)
311 for ref in self.init_inputs_refs:
312 qbb.getDirect(ref)
313 for ref in self.output_refs:
314 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
316 provenance1 = qbb.extract_provenance_data()
317 prov_json = provenance1.json()
318 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json))
319 for provenance in (provenance1, provenance2):
320 input_ids = set(ref.id for ref in self.input_refs + self.init_inputs_refs)
321 self.assertEqual(provenance.predicted_inputs, input_ids)
322 self.assertEqual(provenance.available_inputs, input_ids)
323 self.assertEqual(provenance.actual_inputs, input_ids)
324 output_ids = set(ref.id for ref in self.output_refs)
325 self.assertEqual(provenance.predicted_outputs, output_ids)
326 self.assertEqual(provenance.actual_outputs, output_ids)
327 datastore_name = "FileDatastore@<butlerRoot>/datastore"
328 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name})
329 datastore_records = provenance.datastore_records[datastore_name]
330 self.assertEqual(set(datastore_records.dataset_ids), output_ids)
331 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo"
332 table_name = "file_datastore_records"
333 self.assertEqual(set(datastore_records.records.keys()), {class_name})
334 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name})
335 self.assertEqual(
336 set(record["dataset_id"] for record in datastore_records.records[class_name][table_name]),
337 output_ids,
338 )
340 def test_collect_and_transfer(self) -> None:
341 """Test for collect_and_transfer method"""
343 quantum1 = self.make_quantum(1)
344 qbb1 = QuantumBackedButler.initialize(config=self.config, quantum=quantum1, dimensions=self.universe)
346 quantum2 = self.make_quantum(2)
347 qbb2 = QuantumBackedButler.initialize(config=self.config, quantum=quantum2, dimensions=self.universe)
349 # read/store everything
350 for ref in self.input_refs:
351 qbb1.getDirect(ref)
352 for ref in self.init_inputs_refs:
353 qbb1.getDirect(ref)
354 for ref in self.output_refs:
355 qbb1.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
357 for ref in self.output_refs:
358 qbb2.getDirect(ref)
359 for ref in self.output_refs2:
360 qbb2.putDirect({"data": cast(int, ref.dataId["detector"]) ** 3}, ref)
362 QuantumProvenanceData.collect_and_transfer(
363 self.butler,
364 [quantum1, quantum2],
365 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()],
366 )
368 for ref in self.output_refs:
369 data = self.butler.getDirect(ref)
370 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
372 for ref in self.output_refs2:
373 data = self.butler.getDirect(ref)
374 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3})
377if __name__ == "__main__": 377 ↛ 378line 377 didn't jump to line 378, because the condition on line 377 was never true
378 unittest.main()