Coverage for tests/test_quantumBackedButler.py: 7%
216 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-23 02:06 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-23 02:06 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import json
23import os
24import unittest
25import uuid
26from typing import cast
28from lsst.daf.butler import (
29 Butler,
30 Config,
31 DatasetRef,
32 DatasetType,
33 DimensionUniverse,
34 Quantum,
35 QuantumBackedButler,
36 QuantumProvenanceData,
37 Registry,
38 RegistryConfig,
39 StorageClass,
40)
41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
43TESTDIR = os.path.abspath(os.path.dirname(__file__))
46class QuantumBackedButlerTestCase(unittest.TestCase):
47 """Test case for QuantumBackedButler."""
49 def setUp(self) -> None:
50 self.root = makeTestTempDir(TESTDIR)
51 self.config = Config()
52 self.config["root"] = self.root
53 self.universe = DimensionUniverse()
55 # Make a butler and import dimension definitions.
56 registryConfig = RegistryConfig(self.config.get("registry"))
57 Registry.createFromConfig(registryConfig, butlerRoot=self.root)
58 self.butler = Butler(self.config, writeable=True, run="RUN")
59 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
61 # make all dataset types
62 graph = self.universe.extract(("instrument", "detector"))
63 storageClass = StorageClass("StructuredDataDict")
64 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass)
65 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass)
66 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass)
67 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass)
68 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass)
70 self.dataset_types: dict[str, DatasetType] = {}
71 dataset_types = (
72 self.datasetTypeInit,
73 self.datasetTypeInput,
74 self.datasetTypeOutput,
75 self.datasetTypeOutput2,
76 self.datasetTypeExtra,
77 )
78 for dataset_type in dataset_types:
79 self.butler.registry.registerDatasetType(dataset_type)
80 self.dataset_types[dataset_type.name] = dataset_type
82 dataIds = [
83 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id))
84 for detector_id in (1, 2, 3, 4)
85 ]
87 # make actual input datasets
88 self.input_refs = [
89 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds
90 ]
91 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])]
92 self.all_input_refs = self.input_refs + self.init_inputs_refs
94 # generate dataset refs for outputs
95 self.output_refs = [
96 DatasetRef(self.datasetTypeOutput, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds
97 ]
98 self.output_refs2 = [
99 DatasetRef(self.datasetTypeOutput2, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds
100 ]
102 self.missing_refs = [
103 DatasetRef(self.datasetTypeExtra, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds
104 ]
106 def tearDown(self) -> None:
107 removeTestTempDir(self.root)
109 def make_quantum(self, step: int = 1) -> Quantum:
110 """Make a Quantum which includes datastore records."""
112 if step == 1:
113 datastore_records = self.butler.datastore.export_records(self.all_input_refs)
114 predictedInputs = {self.datasetTypeInput: self.input_refs}
115 outputs = {self.datasetTypeOutput: self.output_refs}
116 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]}
117 elif step == 2:
118 # The result should be empty, this is just to test that it works.
119 datastore_records = self.butler.datastore.export_records(self.output_refs)
120 predictedInputs = {self.datasetTypeInput: self.output_refs}
121 outputs = {self.datasetTypeOutput2: self.output_refs2}
122 initInputs = {}
123 else:
124 raise ValueError(f"unexpected {step} value")
126 return Quantum(
127 taskName="some.task.name",
128 inputs=predictedInputs,
129 outputs=outputs,
130 initInputs=initInputs,
131 datastore_records=datastore_records,
132 )
134 def test_initialize(self) -> None:
135 """Test for initialize factory method"""
137 quantum = self.make_quantum()
138 qbb = QuantumBackedButler.initialize(
139 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
140 )
141 self._test_factory(qbb)
143 def test_from_predicted(self) -> None:
144 """Test for from_predicted factory method"""
146 datastore_records = self.butler.datastore.export_records(self.all_input_refs)
147 qbb = QuantumBackedButler.from_predicted(
148 config=self.config,
149 predicted_inputs=[ref.getCheckedId() for ref in self.all_input_refs],
150 predicted_outputs=[ref.getCheckedId() for ref in self.output_refs],
151 dimensions=self.universe,
152 datastore_records=datastore_records,
153 dataset_types=self.dataset_types,
154 )
155 self._test_factory(qbb)
157 def _test_factory(self, qbb: QuantumBackedButler) -> None:
158 """Test state immediately after construction."""
160 self.assertTrue(qbb.isWriteable())
161 self.assertEqual(qbb._predicted_inputs, set(ref.id for ref in self.all_input_refs))
162 self.assertEqual(qbb._predicted_outputs, set(ref.id for ref in self.output_refs))
163 self.assertEqual(qbb._available_inputs, set())
164 self.assertEqual(qbb._unavailable_inputs, set())
165 self.assertEqual(qbb._actual_inputs, set())
166 self.assertEqual(qbb._actual_output_refs, set())
168 def test_getPutDirect(self) -> None:
169 """Test for getDirect/putDirect methods"""
171 quantum = self.make_quantum()
172 qbb = QuantumBackedButler.initialize(
173 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
174 )
176 # Verify all input data are readable.
177 for ref in self.input_refs:
178 data = qbb.getDirect(ref)
179 self.assertEqual(data, {"data": ref.dataId["detector"]})
180 for ref in self.init_inputs_refs:
181 data = qbb.getDirect(ref)
182 self.assertEqual(data, {"data": -1})
183 for ref in self.missing_refs:
184 with self.assertRaises(FileNotFoundError):
185 data = qbb.getDirect(ref)
187 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
188 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
189 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs))
191 # Write all expected outputs.
192 for ref in self.output_refs:
193 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
195 # Must be able to read them back
196 for ref in self.output_refs:
197 data = qbb.getDirect(ref)
198 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
200 self.assertEqual(qbb._actual_output_refs, set(self.output_refs))
202 def test_getDirectDeferred(self) -> None:
203 """Test for getDirectDeferred method"""
205 quantum = self.make_quantum()
206 qbb = QuantumBackedButler.initialize(
207 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
208 )
210 # get some input data
211 input_refs = self.input_refs[:2]
212 for ref in input_refs:
213 data = qbb.getDirectDeferred(ref)
214 self.assertEqual(data.get(), {"data": ref.dataId["detector"]})
215 for ref in self.init_inputs_refs:
216 data = qbb.getDirectDeferred(ref)
217 self.assertEqual(data.get(), {"data": -1})
218 for ref in self.missing_refs:
219 data = qbb.getDirectDeferred(ref)
220 with self.assertRaises(FileNotFoundError):
221 data.get()
223 # _avalable_inputs is not
224 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
225 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
226 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs))
228 def test_datasetExistsDirect(self) -> None:
229 """Test for datasetExistsDirect method"""
231 quantum = self.make_quantum()
232 qbb = QuantumBackedButler.initialize(
233 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
234 )
236 # get some input data
237 input_refs = self.input_refs[:2]
238 for ref in input_refs:
239 exists = qbb.datasetExistsDirect(ref)
240 self.assertTrue(exists)
241 for ref in self.init_inputs_refs:
242 exists = qbb.datasetExistsDirect(ref)
243 self.assertTrue(exists)
244 for ref in self.missing_refs:
245 exists = qbb.datasetExistsDirect(ref)
246 self.assertFalse(exists)
248 # _available_inputs is not
249 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
250 self.assertEqual(qbb._actual_inputs, set())
251 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect?
253 def test_markInputUnused(self) -> None:
254 """Test for markInputUnused method"""
256 quantum = self.make_quantum()
257 qbb = QuantumBackedButler.initialize(
258 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
259 )
261 # get some input data
262 for ref in self.input_refs:
263 data = qbb.getDirect(ref)
264 self.assertEqual(data, {"data": ref.dataId["detector"]})
265 for ref in self.init_inputs_refs:
266 data = qbb.getDirect(ref)
267 self.assertEqual(data, {"data": -1})
269 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
270 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
272 qbb.markInputUnused(self.input_refs[0])
273 self.assertEqual(
274 qbb._actual_inputs, set(ref.id for ref in self.input_refs[1:] + self.init_inputs_refs)
275 )
277 def test_pruneDatasets(self) -> None:
278 """Test for pruneDatasets methods"""
280 quantum = self.make_quantum()
281 qbb = QuantumBackedButler.initialize(
282 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
283 )
285 # Write all expected outputs.
286 for ref in self.output_refs:
287 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
289 # Must be able to read them back
290 for ref in self.output_refs:
291 data = qbb.getDirect(ref)
292 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
294 # Check for invalid arguments.
295 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"):
296 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True)
297 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"):
298 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True)
299 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"):
300 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False)
302 # Disassociate only.
303 ref = self.output_refs[0]
304 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False)
305 self.assertFalse(qbb.datasetExistsDirect(ref))
306 with self.assertRaises(FileNotFoundError):
307 data = qbb.getDirect(ref)
309 # can store it again
310 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
311 self.assertTrue(qbb.datasetExistsDirect(ref))
313 # Purge completely.
314 ref = self.output_refs[1]
315 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True)
316 self.assertFalse(qbb.datasetExistsDirect(ref))
317 with self.assertRaises(FileNotFoundError):
318 data = qbb.getDirect(ref)
319 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
320 self.assertTrue(qbb.datasetExistsDirect(ref))
322 def test_extract_provenance_data(self) -> None:
323 """Test for extract_provenance_data method"""
325 quantum = self.make_quantum()
326 qbb = QuantumBackedButler.initialize(
327 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
328 )
330 # read/store everything
331 for ref in self.input_refs:
332 qbb.getDirect(ref)
333 for ref in self.init_inputs_refs:
334 qbb.getDirect(ref)
335 for ref in self.output_refs:
336 qbb.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
338 provenance1 = qbb.extract_provenance_data()
339 prov_json = provenance1.json()
340 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json))
341 for provenance in (provenance1, provenance2):
342 input_ids = set(ref.id for ref in self.input_refs + self.init_inputs_refs)
343 self.assertEqual(provenance.predicted_inputs, input_ids)
344 self.assertEqual(provenance.available_inputs, input_ids)
345 self.assertEqual(provenance.actual_inputs, input_ids)
346 output_ids = set(ref.id for ref in self.output_refs)
347 self.assertEqual(provenance.predicted_outputs, output_ids)
348 self.assertEqual(provenance.actual_outputs, output_ids)
349 datastore_name = "FileDatastore@<butlerRoot>/datastore"
350 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name})
351 datastore_records = provenance.datastore_records[datastore_name]
352 self.assertEqual(set(datastore_records.dataset_ids), output_ids)
353 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo"
354 table_name = "file_datastore_records"
355 self.assertEqual(set(datastore_records.records.keys()), {class_name})
356 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name})
357 self.assertEqual(
358 set(record["dataset_id"] for record in datastore_records.records[class_name][table_name]),
359 output_ids,
360 )
362 def test_collect_and_transfer(self) -> None:
363 """Test for collect_and_transfer method"""
365 quantum1 = self.make_quantum(1)
366 qbb1 = QuantumBackedButler.initialize(
367 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types
368 )
370 quantum2 = self.make_quantum(2)
371 qbb2 = QuantumBackedButler.initialize(
372 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types
373 )
375 # read/store everything
376 for ref in self.input_refs:
377 qbb1.getDirect(ref)
378 for ref in self.init_inputs_refs:
379 qbb1.getDirect(ref)
380 for ref in self.output_refs:
381 qbb1.putDirect({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
383 for ref in self.output_refs:
384 qbb2.getDirect(ref)
385 for ref in self.output_refs2:
386 qbb2.putDirect({"data": cast(int, ref.dataId["detector"]) ** 3}, ref)
388 QuantumProvenanceData.collect_and_transfer(
389 self.butler,
390 [quantum1, quantum2],
391 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()],
392 )
394 for ref in self.output_refs:
395 data = self.butler.getDirect(ref)
396 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
398 for ref in self.output_refs2:
399 data = self.butler.getDirect(ref)
400 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3})
403if __name__ == "__main__": 403 ↛ 404line 403 didn't jump to line 404, because the condition on line 403 was never true
404 unittest.main()