Coverage for tests/test_quantumBackedButler.py: 7%
233 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28import json
29import os
30import unittest
31import unittest.mock
32from typing import cast
34from lsst.daf.butler import (
35 Butler,
36 Config,
37 DatasetRef,
38 DatasetType,
39 DimensionUniverse,
40 Quantum,
41 QuantumBackedButler,
42 QuantumProvenanceData,
43 RegistryConfig,
44 StorageClass,
45)
46from lsst.daf.butler.registry import _RegistryFactory
47from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
48from lsst.resources import ResourcePath
50TESTDIR = os.path.abspath(os.path.dirname(__file__))
53class QuantumBackedButlerTestCase(unittest.TestCase):
54 """Test case for QuantumBackedButler."""
56 def setUp(self) -> None:
57 self.root = makeTestTempDir(TESTDIR)
58 self.config = Config()
59 self.config["root"] = self.root
60 self.universe = DimensionUniverse()
62 # Make a butler and import dimension definitions.
63 registryConfig = RegistryConfig(self.config.get("registry"))
64 _RegistryFactory(registryConfig).create_from_config(butlerRoot=self.root)
65 self.butler = Butler(self.config, writeable=True, run="RUN")
66 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
68 # make all dataset types
69 graph = self.universe.extract(("instrument", "detector"))
70 storageClass = StorageClass("StructuredDataDict")
71 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass)
72 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass)
73 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass)
74 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass)
75 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass)
77 self.dataset_types: dict[str, DatasetType] = {}
78 dataset_types = (
79 self.datasetTypeInit,
80 self.datasetTypeInput,
81 self.datasetTypeOutput,
82 self.datasetTypeOutput2,
83 self.datasetTypeExtra,
84 )
85 for dataset_type in dataset_types:
86 self.butler.registry.registerDatasetType(dataset_type)
87 self.dataset_types[dataset_type.name] = dataset_type
89 dataIds = [
90 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id))
91 for detector_id in (1, 2, 3, 4)
92 ]
94 # make actual input datasets
95 self.input_refs = [
96 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds
97 ]
98 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])]
99 self.all_input_refs = self.input_refs + self.init_inputs_refs
101 # generate dataset refs for outputs
102 self.output_refs = [DatasetRef(self.datasetTypeOutput, dataId, run="RUN") for dataId in dataIds]
103 self.output_refs2 = [DatasetRef(self.datasetTypeOutput2, dataId, run="RUN") for dataId in dataIds]
105 self.missing_refs = [DatasetRef(self.datasetTypeExtra, dataId, run="RUN") for dataId in dataIds]
107 def tearDown(self) -> None:
108 removeTestTempDir(self.root)
110 def make_quantum(self, step: int = 1) -> Quantum:
111 """Make a Quantum which includes datastore records."""
112 if step == 1:
113 datastore_records = self.butler._datastore.export_records(self.all_input_refs)
114 predictedInputs = {self.datasetTypeInput: self.input_refs}
115 outputs = {self.datasetTypeOutput: self.output_refs}
116 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]}
117 elif step == 2:
118 # The result should be empty, this is just to test that it works.
119 datastore_records = self.butler._datastore.export_records(self.output_refs)
120 predictedInputs = {self.datasetTypeInput: self.output_refs}
121 outputs = {self.datasetTypeOutput2: self.output_refs2}
122 initInputs = {}
123 else:
124 raise ValueError(f"unexpected {step} value")
126 return Quantum(
127 taskName="some.task.name",
128 inputs=predictedInputs,
129 outputs=outputs,
130 initInputs=initInputs,
131 datastore_records=datastore_records,
132 )
134 def test_initialize(self) -> None:
135 """Test for initialize factory method"""
136 quantum = self.make_quantum()
137 qbb = QuantumBackedButler.initialize(
138 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
139 )
140 self._test_factory(qbb)
142 def test_initialize_repo_index(self) -> None:
143 """Test for initialize using config file and repo index."""
144 # Store config to a file.
145 self.config.dumpToUri(self.root)
147 butler_index = Config()
148 butler_index["label"] = self.root
149 with ResourcePath.temporary_uri(suffix=".yaml") as index_path:
150 butler_index.dumpToUri(index_path)
152 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(index_path)}):
153 quantum = self.make_quantum()
154 qbb = QuantumBackedButler.initialize(
155 config="label",
156 quantum=quantum,
157 dimensions=self.universe,
158 dataset_types=self.dataset_types,
159 )
160 self._test_factory(qbb)
162 def test_from_predicted(self) -> None:
163 """Test for from_predicted factory method"""
164 datastore_records = self.butler._datastore.export_records(self.all_input_refs)
165 qbb = QuantumBackedButler.from_predicted(
166 config=self.config,
167 predicted_inputs=[ref.id for ref in self.all_input_refs],
168 predicted_outputs=[ref.id for ref in self.output_refs],
169 dimensions=self.universe,
170 datastore_records=datastore_records,
171 dataset_types=self.dataset_types,
172 )
173 self._test_factory(qbb)
175 def _test_factory(self, qbb: QuantumBackedButler) -> None:
176 """Test state immediately after construction."""
177 self.assertTrue(qbb.isWriteable())
178 self.assertEqual(qbb._predicted_inputs, {ref.id for ref in self.all_input_refs})
179 self.assertEqual(qbb._predicted_outputs, {ref.id for ref in self.output_refs})
180 self.assertEqual(qbb._available_inputs, set())
181 self.assertEqual(qbb._unavailable_inputs, set())
182 self.assertEqual(qbb._actual_inputs, set())
183 self.assertEqual(qbb._actual_output_refs, set())
185 def test_getput(self) -> None:
186 """Test for getDirect/putDirect methods"""
187 quantum = self.make_quantum()
188 qbb = QuantumBackedButler.initialize(
189 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
190 )
192 # Verify all input data are readable.
193 for ref in self.input_refs:
194 data = qbb.get(ref)
195 self.assertEqual(data, {"data": ref.dataId["detector"]})
196 for ref in self.init_inputs_refs:
197 data = qbb.get(ref)
198 self.assertEqual(data, {"data": -1})
199 for ref in self.missing_refs:
200 with self.assertRaises(FileNotFoundError):
201 data = qbb.get(ref)
203 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
204 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
205 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs})
207 # Write all expected outputs.
208 for ref in self.output_refs:
209 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
211 # Must be able to read them back
212 for ref in self.output_refs:
213 data = qbb.get(ref)
214 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
216 self.assertEqual(qbb._actual_output_refs, set(self.output_refs))
218 def test_getDeferred(self) -> None:
219 """Test for getDirectDeferred method"""
220 quantum = self.make_quantum()
221 qbb = QuantumBackedButler.initialize(
222 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
223 )
225 # get some input data
226 input_refs = self.input_refs[:2]
227 for ref in input_refs:
228 data = qbb.getDeferred(ref)
229 self.assertEqual(data.get(), {"data": ref.dataId["detector"]})
230 for ref in self.init_inputs_refs:
231 data = qbb.getDeferred(ref)
232 self.assertEqual(data.get(), {"data": -1})
233 for ref in self.missing_refs:
234 data = qbb.getDeferred(ref)
235 with self.assertRaises(FileNotFoundError):
236 data.get()
238 # _avalable_inputs is not
239 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
240 self.assertEqual(qbb._actual_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
241 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs})
243 def test_datasetExistsDirect(self) -> None:
244 """Test for dataset existence method"""
245 quantum = self.make_quantum()
246 qbb = QuantumBackedButler.initialize(
247 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
248 )
250 # get some input data
251 input_refs = self.input_refs[:2]
252 for ref in input_refs:
253 exists = qbb.stored(ref)
254 self.assertTrue(exists)
255 for ref in self.init_inputs_refs:
256 exists = qbb.stored(ref)
257 self.assertTrue(exists)
258 for ref in self.missing_refs:
259 exists = qbb.stored(ref)
260 self.assertFalse(exists)
262 # Now do the same checks in bulk.
263 missing_set = set(self.missing_refs)
264 refs = input_refs + self.init_inputs_refs + self.missing_refs
265 stored_many = qbb.stored_many(refs)
266 for ref, stored in stored_many.items():
267 if ref in missing_set:
268 self.assertFalse(stored)
269 else:
270 self.assertTrue(stored)
272 # _available_inputs is not
273 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
274 self.assertEqual(qbb._actual_inputs, set())
275 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect?
277 def test_markInputUnused(self) -> None:
278 """Test for markInputUnused method"""
279 quantum = self.make_quantum()
280 qbb = QuantumBackedButler.initialize(
281 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
282 )
284 # get some input data
285 for ref in self.input_refs:
286 data = qbb.get(ref)
287 self.assertEqual(data, {"data": ref.dataId["detector"]})
288 for ref in self.init_inputs_refs:
289 data = qbb.get(ref)
290 self.assertEqual(data, {"data": -1})
292 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
293 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
295 qbb.markInputUnused(self.input_refs[0])
296 self.assertEqual(qbb._actual_inputs, {ref.id for ref in self.input_refs[1:] + self.init_inputs_refs})
298 def test_pruneDatasets(self) -> None:
299 """Test for pruneDatasets methods"""
300 quantum = self.make_quantum()
301 qbb = QuantumBackedButler.initialize(
302 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
303 )
305 # Write all expected outputs.
306 for ref in self.output_refs:
307 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
309 # Must be able to read them back
310 for ref in self.output_refs:
311 data = qbb.get(ref)
312 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
314 # Check for invalid arguments.
315 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"):
316 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True)
317 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"):
318 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True)
319 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"):
320 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False)
322 # Disassociate only.
323 ref = self.output_refs[0]
324 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False)
325 self.assertFalse(qbb.stored(ref))
326 with self.assertRaises(FileNotFoundError):
327 data = qbb.get(ref)
329 # can store it again
330 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
331 self.assertTrue(qbb.stored(ref))
333 # Purge completely.
334 ref = self.output_refs[1]
335 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True)
336 self.assertFalse(qbb.stored(ref))
337 with self.assertRaises(FileNotFoundError):
338 data = qbb.get(ref)
339 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
340 self.assertTrue(qbb.stored(ref))
342 def test_extract_provenance_data(self) -> None:
343 """Test for extract_provenance_data method"""
344 quantum = self.make_quantum()
345 qbb = QuantumBackedButler.initialize(
346 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
347 )
349 # read/store everything
350 for ref in self.input_refs:
351 qbb.get(ref)
352 for ref in self.init_inputs_refs:
353 qbb.get(ref)
354 for ref in self.output_refs:
355 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
357 provenance1 = qbb.extract_provenance_data()
358 prov_json = provenance1.json()
359 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json))
360 for provenance in (provenance1, provenance2):
361 input_ids = {ref.id for ref in self.input_refs + self.init_inputs_refs}
362 self.assertEqual(provenance.predicted_inputs, input_ids)
363 self.assertEqual(provenance.available_inputs, input_ids)
364 self.assertEqual(provenance.actual_inputs, input_ids)
365 output_ids = {ref.id for ref in self.output_refs}
366 self.assertEqual(provenance.predicted_outputs, output_ids)
367 self.assertEqual(provenance.actual_outputs, output_ids)
368 datastore_name = "FileDatastore@<butlerRoot>/datastore"
369 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name})
370 datastore_records = provenance.datastore_records[datastore_name]
371 self.assertEqual(set(datastore_records.dataset_ids), output_ids)
372 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo"
373 table_name = "file_datastore_records"
374 self.assertEqual(set(datastore_records.records.keys()), {class_name})
375 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name})
376 self.assertEqual(
377 {record["dataset_id"] for record in datastore_records.records[class_name][table_name]},
378 output_ids,
379 )
381 def test_collect_and_transfer(self) -> None:
382 """Test for collect_and_transfer method"""
383 quantum1 = self.make_quantum(1)
384 qbb1 = QuantumBackedButler.initialize(
385 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types
386 )
388 quantum2 = self.make_quantum(2)
389 qbb2 = QuantumBackedButler.initialize(
390 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types
391 )
393 # read/store everything
394 for ref in self.input_refs:
395 qbb1.get(ref)
396 for ref in self.init_inputs_refs:
397 qbb1.get(ref)
398 for ref in self.output_refs:
399 qbb1.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
401 for ref in self.output_refs:
402 qbb2.get(ref)
403 for ref in self.output_refs2:
404 qbb2.put({"data": cast(int, ref.dataId["detector"]) ** 3}, ref)
406 QuantumProvenanceData.collect_and_transfer(
407 self.butler,
408 [quantum1, quantum2],
409 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()],
410 )
412 for ref in self.output_refs:
413 data = self.butler.get(ref)
414 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
416 for ref in self.output_refs2:
417 data = self.butler.get(ref)
418 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3})
421if __name__ == "__main__":
422 unittest.main()