Coverage for tests/test_quantumBackedButler.py: 7%
207 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-08-04 02:20 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-08-04 02:20 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import json
23import os
24import unittest
25import uuid
27from lsst.daf.butler import (
28 Butler,
29 Config,
30 DatasetRef,
31 DatasetType,
32 DimensionUniverse,
33 Quantum,
34 QuantumBackedButler,
35 QuantumProvenanceData,
36 Registry,
37 RegistryConfig,
38 StorageClass,
39)
40from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
42TESTDIR = os.path.abspath(os.path.dirname(__file__))
45class QuantumBackedButlerTestCase(unittest.TestCase):
46 """Test case for QuantumBackedButler."""
48 def setUp(self):
49 self.root = makeTestTempDir(TESTDIR)
50 self.config = Config()
51 self.config["root"] = self.root
52 self.universe = DimensionUniverse()
54 # Make a butler and import dimension definitions.
55 registryConfig = RegistryConfig(self.config.get("registry"))
56 Registry.createFromConfig(registryConfig, butlerRoot=self.root)
57 self.butler = Butler(self.config, writeable=True, run="RUN")
58 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
60 # make all dataset types
61 graph = self.universe.extract(("instrument", "detector"))
62 storageClass = StorageClass("StructuredDataDict")
63 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass)
64 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass)
65 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass)
66 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass)
67 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass)
68 self.butler.registry.registerDatasetType(self.datasetTypeInit)
69 self.butler.registry.registerDatasetType(self.datasetTypeInput)
70 self.butler.registry.registerDatasetType(self.datasetTypeOutput)
71 self.butler.registry.registerDatasetType(self.datasetTypeOutput2)
72 self.butler.registry.registerDatasetType(self.datasetTypeExtra)
74 dataIds = [
75 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id))
76 for detector_id in (1, 2, 3, 4)
77 ]
79 # make actual input datasets
80 self.input_refs = [
81 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds
82 ]
83 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])]
85 # generate dataset refs for outputs
86 self.output_refs = [
87 DatasetRef(self.datasetTypeOutput, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds
88 ]
89 self.output_refs2 = [
90 DatasetRef(self.datasetTypeOutput2, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds
91 ]
93 self.missing_refs = [
94 DatasetRef(self.datasetTypeExtra, dataId, id=uuid.uuid4(), run="RUN") for dataId in dataIds
95 ]
97 def tearDown(self):
98 removeTestTempDir(self.root)
100 def make_quantum(self, step: int = 1) -> Quantum:
101 """Make a Quantum which includes datastore records."""
103 if step == 1:
104 datastore_records = self.butler.datastore.export_records(self.input_refs + self.init_inputs_refs)
105 predictedInputs = {self.datasetTypeInput: self.input_refs}
106 outputs = {self.datasetTypeOutput: self.output_refs}
107 initInputs = {self.datasetTypeInit: self.init_inputs_refs}
108 elif step == 2:
109 # The result should be empty, this is just to test that it works.
110 datastore_records = self.butler.datastore.export_records(self.output_refs)
111 predictedInputs = {self.datasetTypeInput: self.output_refs}
112 outputs = {self.datasetTypeOutput2: self.output_refs2}
113 initInputs = {}
115 return Quantum(
116 taskName="some.task.name",
117 inputs=predictedInputs,
118 outputs=outputs,
119 initInputs=initInputs,
120 datastore_records=datastore_records,
121 )
123 def test_initialize(self):
124 """Test for initialize method"""
126 quantum = self.make_quantum()
127 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
129 # check state after initialize
130 self.assertTrue(qbb.isWriteable())
131 self.assertEqual(qbb._predicted_inputs, set(ref.id for ref in self.input_refs))
132 self.assertEqual(qbb._predicted_outputs, set(ref.id for ref in self.output_refs))
133 self.assertEqual(qbb._available_inputs, set())
134 self.assertEqual(qbb._unavailable_inputs, set())
135 self.assertEqual(qbb._actual_inputs, set())
136 self.assertEqual(qbb._actual_output_refs, set())
138 def test_getPutDirect(self):
139 """Test for getDirect/putDirect methods"""
141 quantum = self.make_quantum()
142 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
144 # Verify all input data are readable.
145 for ref in self.input_refs:
146 data = qbb.getDirect(ref)
147 self.assertEqual(data, {"data": ref.dataId["detector"]})
148 for ref in self.init_inputs_refs:
149 data = qbb.getDirect(ref)
150 self.assertEqual(data, {"data": -1})
151 for ref in self.missing_refs:
152 with self.assertRaises(FileNotFoundError):
153 data = qbb.getDirect(ref)
155 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
156 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
157 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs))
159 # Write all expected outputs.
160 for ref in self.output_refs:
161 qbb.putDirect({"data": ref.dataId["detector"] ** 2}, ref)
163 # Must be able to read them back
164 for ref in self.output_refs:
165 data = qbb.getDirect(ref)
166 self.assertEqual(data, {"data": ref.dataId["detector"] ** 2})
168 self.assertEqual(qbb._actual_output_refs, set(self.output_refs))
170 def test_getDirectDeferred(self):
171 """Test for getDirectDeferred method"""
173 quantum = self.make_quantum()
174 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
176 # get some input data
177 input_refs = self.input_refs[:2]
178 for ref in input_refs:
179 data = qbb.getDirectDeferred(ref)
180 self.assertEqual(data.get(), {"data": ref.dataId["detector"]})
181 for ref in self.init_inputs_refs:
182 data = qbb.getDirectDeferred(ref)
183 self.assertEqual(data.get(), {"data": -1})
184 for ref in self.missing_refs:
185 data = qbb.getDirectDeferred(ref)
186 with self.assertRaises(FileNotFoundError):
187 data.get()
189 # _avalable_inputs is not
190 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs))
191 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in input_refs))
192 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs))
194 def test_datasetExistsDirect(self):
195 """Test for datasetExistsDirect method"""
197 quantum = self.make_quantum()
198 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
200 # get some input data
201 input_refs = self.input_refs[:2]
202 for ref in input_refs:
203 exists = qbb.datasetExistsDirect(ref)
204 self.assertTrue(exists)
205 for ref in self.init_inputs_refs:
206 exists = qbb.datasetExistsDirect(ref)
207 self.assertTrue(exists)
208 for ref in self.missing_refs:
209 exists = qbb.datasetExistsDirect(ref)
210 self.assertFalse(exists)
212 # _available_inputs is not
213 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs))
214 self.assertEqual(qbb._actual_inputs, set())
215 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect?
217 def test_markInputUnused(self):
218 """Test for markInputUnused method"""
220 quantum = self.make_quantum()
221 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
223 # get some input data
224 for ref in self.input_refs:
225 data = qbb.getDirect(ref)
226 self.assertEqual(data, {"data": ref.dataId["detector"]})
227 for ref in self.init_inputs_refs:
228 data = qbb.getDirect(ref)
229 self.assertEqual(data, {"data": -1})
231 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
232 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
234 qbb.markInputUnused(self.input_refs[0])
235 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in self.input_refs[1:]))
237 def test_pruneDatasets(self):
238 """Test for pruneDatasets methods"""
240 quantum = self.make_quantum()
241 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
243 # Write all expected outputs.
244 for ref in self.output_refs:
245 qbb.putDirect({"data": ref.dataId["detector"] ** 2}, ref)
247 # Must be able to read them back
248 for ref in self.output_refs:
249 data = qbb.getDirect(ref)
250 self.assertEqual(data, {"data": ref.dataId["detector"] ** 2})
252 # Check for invalid arguments.
253 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"):
254 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True)
255 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"):
256 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True)
257 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"):
258 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False)
260 # Disassociate only.
261 ref = self.output_refs[0]
262 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False)
263 self.assertFalse(qbb.datasetExistsDirect(ref))
264 with self.assertRaises(FileNotFoundError):
265 data = qbb.getDirect(ref)
267 # can store it again
268 qbb.putDirect({"data": ref.dataId["detector"] ** 2}, ref)
269 self.assertTrue(qbb.datasetExistsDirect(ref))
271 # Purge completely.
272 ref = self.output_refs[1]
273 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True)
274 self.assertFalse(qbb.datasetExistsDirect(ref))
275 with self.assertRaises(FileNotFoundError):
276 data = qbb.getDirect(ref)
277 qbb.putDirect({"data": ref.dataId["detector"] ** 2}, ref)
278 self.assertTrue(qbb.datasetExistsDirect(ref))
280 def test_extract_provenance_data(self):
281 """Test for extract_provenance_data method"""
283 quantum = self.make_quantum()
284 qbb = QuantumBackedButler.initialize(config=self.config, quantum=quantum, dimensions=self.universe)
286 # read/store everything
287 for ref in self.input_refs:
288 qbb.getDirect(ref)
289 for ref in self.init_inputs_refs:
290 qbb.getDirect(ref)
291 for ref in self.output_refs:
292 qbb.putDirect({"data": ref.dataId["detector"] ** 2}, ref)
294 provenance1 = qbb.extract_provenance_data()
295 prov_json = provenance1.json()
296 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json))
297 for provenance in (provenance1, provenance2):
298 input_ids = set(ref.id for ref in self.input_refs)
299 self.assertEqual(provenance.predicted_inputs, input_ids)
300 self.assertEqual(provenance.available_inputs, input_ids)
301 self.assertEqual(provenance.actual_inputs, input_ids)
302 output_ids = set(ref.id for ref in self.output_refs)
303 self.assertEqual(provenance.predicted_outputs, output_ids)
304 self.assertEqual(provenance.actual_outputs, output_ids)
305 datastore_name = "FileDatastore@<butlerRoot>/datastore"
306 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name})
307 datastore_records = provenance.datastore_records[datastore_name]
308 self.assertEqual(set(datastore_records.dataset_ids), output_ids)
309 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo"
310 table_name = "file_datastore_records"
311 self.assertEqual(set(datastore_records.records.keys()), {class_name})
312 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name})
313 self.assertEqual(
314 set(record["dataset_id"] for record in datastore_records.records[class_name][table_name]),
315 output_ids,
316 )
318 def test_collect_and_transfer(self):
319 """Test for collect_and_transfer method"""
321 quantum1 = self.make_quantum(1)
322 qbb1 = QuantumBackedButler.initialize(config=self.config, quantum=quantum1, dimensions=self.universe)
324 quantum2 = self.make_quantum(2)
325 qbb2 = QuantumBackedButler.initialize(config=self.config, quantum=quantum2, dimensions=self.universe)
327 # read/store everything
328 for ref in self.input_refs:
329 qbb1.getDirect(ref)
330 for ref in self.init_inputs_refs:
331 qbb1.getDirect(ref)
332 for ref in self.output_refs:
333 qbb1.putDirect({"data": ref.dataId["detector"] ** 2}, ref)
335 for ref in self.output_refs:
336 qbb2.getDirect(ref)
337 for ref in self.output_refs2:
338 qbb2.putDirect({"data": ref.dataId["detector"] ** 3}, ref)
340 QuantumProvenanceData.collect_and_transfer(
341 self.butler,
342 [quantum1, quantum2],
343 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()],
344 )
346 for ref in self.output_refs:
347 data = self.butler.getDirect(ref)
348 self.assertEqual(data, {"data": ref.dataId["detector"] ** 2})
350 for ref in self.output_refs2:
351 data = self.butler.getDirect(ref)
352 self.assertEqual(data, {"data": ref.dataId["detector"] ** 3})
355if __name__ == "__main__": 355 ↛ 356line 355 didn't jump to line 356, because the condition on line 355 was never true
356 unittest.main()