Coverage for tests/test_quantumBackedButler.py: 7%
237 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 09:54 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 09:54 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28import json
29import os
30import unittest
31import unittest.mock
32from typing import cast
34from lsst.daf.butler import (
35 Butler,
36 Config,
37 DatasetRef,
38 DatasetType,
39 DimensionUniverse,
40 Quantum,
41 QuantumBackedButler,
42 QuantumProvenanceData,
43 RegistryConfig,
44 StorageClass,
45)
46from lsst.daf.butler.direct_butler import DirectButler
47from lsst.daf.butler.registry import _RegistryFactory
48from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
49from lsst.resources import ResourcePath
51TESTDIR = os.path.abspath(os.path.dirname(__file__))
54class QuantumBackedButlerTestCase(unittest.TestCase):
55 """Test case for QuantumBackedButler."""
57 def setUp(self) -> None:
58 self.root = makeTestTempDir(TESTDIR)
59 self.config = Config()
60 self.config["root"] = self.root
61 self.universe = DimensionUniverse()
63 # Make a butler and import dimension definitions.
64 registryConfig = RegistryConfig(self.config.get("registry"))
65 _RegistryFactory(registryConfig).create_from_config(butlerRoot=self.root)
66 butler = Butler.from_config(self.config, writeable=True, run="RUN")
67 assert isinstance(butler, DirectButler)
68 self.butler = butler
69 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
71 # make all dataset types
72 graph = self.universe.conform(("instrument", "detector"))
73 storageClass = StorageClass("StructuredDataDict")
74 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass)
75 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass)
76 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass)
77 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass)
78 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass)
80 self.dataset_types: dict[str, DatasetType] = {}
81 dataset_types = (
82 self.datasetTypeInit,
83 self.datasetTypeInput,
84 self.datasetTypeOutput,
85 self.datasetTypeOutput2,
86 self.datasetTypeExtra,
87 )
88 for dataset_type in dataset_types:
89 self.butler.registry.registerDatasetType(dataset_type)
90 self.dataset_types[dataset_type.name] = dataset_type
92 dataIds = [
93 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id))
94 for detector_id in (1, 2, 3, 4)
95 ]
97 # make actual input datasets
98 self.input_refs = [
99 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds
100 ]
101 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])]
102 self.all_input_refs = self.input_refs + self.init_inputs_refs
104 # generate dataset refs for outputs
105 self.output_refs = [DatasetRef(self.datasetTypeOutput, dataId, run="RUN") for dataId in dataIds]
106 self.output_refs2 = [DatasetRef(self.datasetTypeOutput2, dataId, run="RUN") for dataId in dataIds]
108 self.missing_refs = [DatasetRef(self.datasetTypeExtra, dataId, run="RUN") for dataId in dataIds]
110 def tearDown(self) -> None:
111 removeTestTempDir(self.root)
113 def make_quantum(self, step: int = 1) -> Quantum:
114 """Make a Quantum which includes datastore records."""
115 if step == 1:
116 datastore_records = self.butler._datastore.export_records(self.all_input_refs)
117 predictedInputs = {self.datasetTypeInput: self.input_refs}
118 outputs = {self.datasetTypeOutput: self.output_refs}
119 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]}
120 elif step == 2:
121 # The result should be empty, this is just to test that it works.
122 datastore_records = self.butler._datastore.export_records(self.output_refs)
123 predictedInputs = {self.datasetTypeInput: self.output_refs}
124 outputs = {self.datasetTypeOutput2: self.output_refs2}
125 initInputs = {}
126 else:
127 raise ValueError(f"unexpected {step} value")
129 return Quantum(
130 taskName="some.task.name",
131 inputs=predictedInputs,
132 outputs=outputs,
133 initInputs=initInputs,
134 datastore_records=datastore_records,
135 )
137 def test_initialize(self) -> None:
138 """Test for initialize factory method"""
139 quantum = self.make_quantum()
140 qbb = QuantumBackedButler.initialize(
141 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
142 )
143 self._test_factory(qbb)
145 def test_initialize_repo_index(self) -> None:
146 """Test for initialize using config file and repo index."""
147 # Store config to a file.
148 self.config.dumpToUri(self.root)
150 butler_index = Config()
151 butler_index["label"] = self.root
152 with ResourcePath.temporary_uri(suffix=".yaml") as index_path:
153 butler_index.dumpToUri(index_path)
155 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(index_path)}):
156 quantum = self.make_quantum()
157 qbb = QuantumBackedButler.initialize(
158 config="label",
159 quantum=quantum,
160 dimensions=self.universe,
161 dataset_types=self.dataset_types,
162 )
163 self._test_factory(qbb)
165 def test_from_predicted(self) -> None:
166 """Test for from_predicted factory method"""
167 datastore_records = self.butler._datastore.export_records(self.all_input_refs)
168 qbb = QuantumBackedButler.from_predicted(
169 config=self.config,
170 predicted_inputs=[ref.id for ref in self.all_input_refs],
171 predicted_outputs=[ref.id for ref in self.output_refs],
172 dimensions=self.universe,
173 datastore_records=datastore_records,
174 dataset_types=self.dataset_types,
175 )
176 self._test_factory(qbb)
178 def _test_factory(self, qbb: QuantumBackedButler) -> None:
179 """Test state immediately after construction."""
180 self.assertTrue(qbb.isWriteable())
181 self.assertEqual(qbb._predicted_inputs, {ref.id for ref in self.all_input_refs})
182 self.assertEqual(qbb._predicted_outputs, {ref.id for ref in self.output_refs})
183 self.assertEqual(qbb._available_inputs, set())
184 self.assertEqual(qbb._unavailable_inputs, set())
185 self.assertEqual(qbb._actual_inputs, set())
186 self.assertEqual(qbb._actual_output_refs, set())
188 def test_getput(self) -> None:
189 """Test for get/put methods"""
190 quantum = self.make_quantum()
191 qbb = QuantumBackedButler.initialize(
192 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
193 )
195 # Verify all input data are readable.
196 for ref in self.input_refs:
197 data = qbb.get(ref)
198 self.assertEqual(data, {"data": ref.dataId["detector"]})
199 for ref in self.init_inputs_refs:
200 data = qbb.get(ref)
201 self.assertEqual(data, {"data": -1})
202 for ref in self.missing_refs:
203 with self.assertRaises(FileNotFoundError):
204 data = qbb.get(ref)
206 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
207 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
208 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs})
210 # Write all expected outputs.
211 for ref in self.output_refs:
212 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
214 # Must be able to read them back
215 for ref in self.output_refs:
216 data = qbb.get(ref)
217 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
219 self.assertEqual(qbb._actual_output_refs, set(self.output_refs))
221 def test_getDeferred(self) -> None:
222 """Test for getDeferred method"""
223 quantum = self.make_quantum()
224 qbb = QuantumBackedButler.initialize(
225 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
226 )
228 # get some input data
229 input_refs = self.input_refs[:2]
230 for ref in input_refs:
231 data = qbb.getDeferred(ref)
232 self.assertEqual(data.get(), {"data": ref.dataId["detector"]})
233 for ref in self.init_inputs_refs:
234 data = qbb.getDeferred(ref)
235 self.assertEqual(data.get(), {"data": -1})
236 for ref in self.missing_refs:
237 data = qbb.getDeferred(ref)
238 with self.assertRaises(FileNotFoundError):
239 data.get()
241 # _avalable_inputs is not
242 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
243 self.assertEqual(qbb._actual_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
244 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs})
246 def test_stored(self) -> None:
247 """Test for dataset existence method"""
248 quantum = self.make_quantum()
249 qbb = QuantumBackedButler.initialize(
250 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
251 )
253 # get some input data
254 input_refs = self.input_refs[:2]
255 for ref in input_refs:
256 exists = qbb.stored(ref)
257 self.assertTrue(exists)
258 for ref in self.init_inputs_refs:
259 exists = qbb.stored(ref)
260 self.assertTrue(exists)
261 for ref in self.missing_refs:
262 exists = qbb.stored(ref)
263 self.assertFalse(exists)
265 # Now do the same checks in bulk.
266 missing_set = set(self.missing_refs)
267 refs = input_refs + self.init_inputs_refs + self.missing_refs
268 stored_many = qbb.stored_many(refs)
269 for ref, stored in stored_many.items():
270 if ref in missing_set:
271 self.assertFalse(stored)
272 else:
273 self.assertTrue(stored)
275 # _available_inputs is not
276 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
277 self.assertEqual(qbb._actual_inputs, set())
278 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with get?
280 def test_markInputUnused(self) -> None:
281 """Test for markInputUnused method"""
282 quantum = self.make_quantum()
283 qbb = QuantumBackedButler.initialize(
284 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
285 )
287 # get some input data
288 for ref in self.input_refs:
289 data = qbb.get(ref)
290 self.assertEqual(data, {"data": ref.dataId["detector"]})
291 for ref in self.init_inputs_refs:
292 data = qbb.get(ref)
293 self.assertEqual(data, {"data": -1})
295 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
296 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
298 qbb.markInputUnused(self.input_refs[0])
299 self.assertEqual(qbb._actual_inputs, {ref.id for ref in self.input_refs[1:] + self.init_inputs_refs})
301 def test_pruneDatasets(self) -> None:
302 """Test for pruneDatasets methods"""
303 quantum = self.make_quantum()
304 qbb = QuantumBackedButler.initialize(
305 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
306 )
308 # Write all expected outputs.
309 for ref in self.output_refs:
310 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
312 # Must be able to read them back
313 for ref in self.output_refs:
314 data = qbb.get(ref)
315 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
317 # Check for invalid arguments.
318 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"):
319 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True)
320 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"):
321 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True)
322 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"):
323 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False)
325 # Disassociate only.
326 ref = self.output_refs[0]
327 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False)
328 self.assertFalse(qbb.stored(ref))
329 with self.assertRaises(FileNotFoundError):
330 data = qbb.get(ref)
332 # can store it again
333 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
334 self.assertTrue(qbb.stored(ref))
336 # Purge completely.
337 ref = self.output_refs[1]
338 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True)
339 self.assertFalse(qbb.stored(ref))
340 with self.assertRaises(FileNotFoundError):
341 data = qbb.get(ref)
342 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
343 self.assertTrue(qbb.stored(ref))
345 def test_extract_provenance_data(self) -> None:
346 """Test for extract_provenance_data method"""
347 quantum = self.make_quantum()
348 qbb = QuantumBackedButler.initialize(
349 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
350 )
352 # read/store everything
353 for ref in self.input_refs:
354 qbb.get(ref)
355 for ref in self.init_inputs_refs:
356 qbb.get(ref)
357 for ref in self.output_refs:
358 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
360 provenance1 = qbb.extract_provenance_data()
361 prov_json = provenance1.model_dump_json()
362 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json))
363 for provenance in (provenance1, provenance2):
364 input_ids = {ref.id for ref in self.input_refs + self.init_inputs_refs}
365 self.assertEqual(provenance.predicted_inputs, input_ids)
366 self.assertEqual(provenance.available_inputs, input_ids)
367 self.assertEqual(provenance.actual_inputs, input_ids)
368 output_ids = {ref.id for ref in self.output_refs}
369 self.assertEqual(provenance.predicted_outputs, output_ids)
370 self.assertEqual(provenance.actual_outputs, output_ids)
371 datastore_name = "FileDatastore@<butlerRoot>/datastore"
372 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name})
373 datastore_records = provenance.datastore_records[datastore_name]
374 self.assertEqual(set(datastore_records.dataset_ids), output_ids)
375 class_name = "lsst.daf.butler.datastore.stored_file_info.StoredFileInfo"
376 self.assertEqual(set(datastore_records.records.keys()), {class_name})
377 self.assertEqual(set(datastore_records.records[class_name].keys()), {id.hex for id in output_ids})
378 table_name = "file_datastore_records"
379 for dataset_data in datastore_records.records[class_name].values():
380 self.assertEqual(set(dataset_data), {table_name})
382 def test_collect_and_transfer(self) -> None:
383 """Test for collect_and_transfer method"""
384 quantum1 = self.make_quantum(1)
385 qbb1 = QuantumBackedButler.initialize(
386 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types
387 )
389 quantum2 = self.make_quantum(2)
390 qbb2 = QuantumBackedButler.initialize(
391 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types
392 )
394 # read/store everything
395 for ref in self.input_refs:
396 qbb1.get(ref)
397 for ref in self.init_inputs_refs:
398 qbb1.get(ref)
399 for ref in self.output_refs:
400 qbb1.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
402 for ref in self.output_refs:
403 qbb2.get(ref)
404 for ref in self.output_refs2:
405 qbb2.put({"data": cast(int, ref.dataId["detector"]) ** 3}, ref)
407 QuantumProvenanceData.collect_and_transfer(
408 self.butler,
409 [quantum1, quantum2],
410 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()],
411 )
413 for ref in self.output_refs:
414 data = self.butler.get(ref)
415 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
417 for ref in self.output_refs2:
418 data = self.butler.get(ref)
419 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3})
422if __name__ == "__main__":
423 unittest.main()