Coverage for tests/test_datastore.py: 11%
1060 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-12 09:20 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-12 09:20 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import pickle
26import shutil
27import tempfile
28import time
29import unittest
30import unittest.mock
31import uuid
32from collections.abc import Callable
33from typing import Any, cast
35import lsst.utils.tests
36import yaml
37from lsst.daf.butler import (
38 Config,
39 DataCoordinate,
40 DatasetRef,
41 DatasetRefURIs,
42 DatasetType,
43 DatasetTypeNotSupportedError,
44 Datastore,
45 DatastoreCacheManager,
46 DatastoreCacheManagerConfig,
47 DatastoreConfig,
48 DatastoreDisabledCacheManager,
49 DatastoreValidationError,
50 DimensionUniverse,
51 FileDataset,
52 NullDatastore,
53 StorageClass,
54 StorageClassFactory,
55 StoredFileInfo,
56)
57from lsst.daf.butler.formatters.yaml import YamlFormatter
58from lsst.daf.butler.tests import (
59 BadNoWriteFormatter,
60 BadWriteFormatter,
61 DatasetTestHelper,
62 DatastoreTestHelper,
63 DummyRegistry,
64 MetricsExample,
65 MetricsExampleDataclass,
66 MetricsExampleModel,
67)
68from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
69from lsst.daf.butler.tests.utils import TestCaseMixin
70from lsst.resources import ResourcePath
71from lsst.utils import doImport
73TESTDIR = os.path.dirname(__file__)
76def makeExampleMetrics(use_none: bool = False) -> MetricsExample:
77 """Make example dataset that can be stored in butler."""
78 if use_none:
79 array = None
80 else:
81 array = [563, 234, 456.7, 105, 2054, -1045]
82 return MetricsExample(
83 {"AM1": 5.2, "AM2": 30.6},
84 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
85 array,
86 )
89class TransactionTestError(Exception):
90 """Specific error for transactions, to prevent misdiagnosing
91 that might otherwise occur when a standard exception is used.
92 """
94 pass
97class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin):
98 """Support routines for datastore testing"""
100 root: str | None = None
101 universe: DimensionUniverse
102 storageClassFactory: StorageClassFactory
104 @classmethod
105 def setUpClass(cls) -> None:
106 # Storage Classes are fixed for all datastores in these tests
107 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
108 cls.storageClassFactory = StorageClassFactory()
109 cls.storageClassFactory.addFromConfig(scConfigFile)
111 # Read the Datastore config so we can get the class
112 # information (since we should not assume the constructor
113 # name here, but rely on the configuration file itself)
114 datastoreConfig = DatastoreConfig(cls.configFile)
115 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"]))
116 cls.universe = DimensionUniverse()
118 def setUp(self) -> None:
119 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
121 def tearDown(self) -> None:
122 if self.root is not None and os.path.exists(self.root):
123 shutil.rmtree(self.root, ignore_errors=True)
126class DatastoreTests(DatastoreTestsBase):
127 """Some basic tests of a simple datastore."""
129 hasUnsupportedPut = True
130 rootKeys: tuple[str, ...] | None = None
131 isEphemeral: bool = False
132 validationCanFail: bool = False
134 def testConfigRoot(self) -> None:
135 full = DatastoreConfig(self.configFile)
136 config = DatastoreConfig(self.configFile, mergeDefaults=False)
137 newroot = "/random/location"
138 self.datastoreType.setConfigRoot(newroot, config, full)
139 if self.rootKeys:
140 for k in self.rootKeys:
141 self.assertIn(newroot, config[k])
143 def testConstructor(self) -> None:
144 datastore = self.makeDatastore()
145 self.assertIsNotNone(datastore)
146 self.assertIs(datastore.isEphemeral, self.isEphemeral)
148 def testConfigurationValidation(self) -> None:
149 datastore = self.makeDatastore()
150 sc = self.storageClassFactory.getStorageClass("ThingOne")
151 datastore.validateConfiguration([sc])
153 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
154 if self.validationCanFail:
155 with self.assertRaises(DatastoreValidationError):
156 datastore.validateConfiguration([sc2], logFailures=True)
158 dimensions = self.universe.extract(("visit", "physical_filter"))
159 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
160 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
161 datastore.validateConfiguration([ref])
163 def testParameterValidation(self) -> None:
164 """Check that parameters are validated"""
165 sc = self.storageClassFactory.getStorageClass("ThingOne")
166 dimensions = self.universe.extract(("visit", "physical_filter"))
167 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
168 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
169 datastore = self.makeDatastore()
170 data = {1: 2, 3: 4}
171 datastore.put(data, ref)
172 newdata = datastore.get(ref)
173 self.assertEqual(data, newdata)
174 with self.assertRaises(KeyError):
175 newdata = datastore.get(ref, parameters={"missing": 5})
177 def testBasicPutGet(self) -> None:
178 metrics = makeExampleMetrics()
179 datastore = self.makeDatastore()
181 # Create multiple storage classes for testing different formulations
182 storageClasses = [
183 self.storageClassFactory.getStorageClass(sc)
184 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
185 ]
187 dimensions = self.universe.extract(("visit", "physical_filter"))
188 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
189 dataId2 = {"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"}
191 for sc in storageClasses:
192 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
193 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2)
195 # Make sure that using getManyURIs without predicting before the
196 # dataset has been put raises.
197 with self.assertRaises(FileNotFoundError):
198 datastore.getManyURIs([ref], predict=False)
200 # Make sure that using getManyURIs with predicting before the
201 # dataset has been put predicts the URI.
202 uris = datastore.getManyURIs([ref, ref2], predict=True)
203 self.assertIn("52", uris[ref].primaryURI.geturl())
204 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
205 self.assertIn("53", uris[ref2].primaryURI.geturl())
206 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
208 datastore.put(metrics, ref)
210 # Does it exist?
211 self.assertTrue(datastore.exists(ref))
212 self.assertTrue(datastore.knows(ref))
213 multi = datastore.knows_these([ref])
214 self.assertTrue(multi[ref])
215 multi = datastore.mexists([ref, ref2])
216 self.assertTrue(multi[ref])
217 self.assertFalse(multi[ref2])
219 # Get
220 metricsOut = datastore.get(ref, parameters=None)
221 self.assertEqual(metrics, metricsOut)
223 uri = datastore.getURI(ref)
224 self.assertEqual(uri.scheme, self.uriScheme)
226 uris = datastore.getManyURIs([ref])
227 self.assertEqual(len(uris), 1)
228 ref, uri = uris.popitem()
229 self.assertTrue(uri.primaryURI.exists())
230 self.assertFalse(uri.componentURIs)
232 # Get a component -- we need to construct new refs for them
233 # with derived storage classes but with parent ID
234 for comp in ("data", "output"):
235 compRef = ref.makeComponentRef(comp)
236 output = datastore.get(compRef)
237 self.assertEqual(output, getattr(metricsOut, comp))
239 uri = datastore.getURI(compRef)
240 self.assertEqual(uri.scheme, self.uriScheme)
242 uris = datastore.getManyURIs([compRef])
243 self.assertEqual(len(uris), 1)
245 storageClass = sc
247 # Check that we can put a metric with None in a component and
248 # get it back as None
249 metricsNone = makeExampleMetrics(use_none=True)
250 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"}
251 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone)
252 datastore.put(metricsNone, refNone)
254 comp = "data"
255 for comp in ("data", "output"):
256 compRef = refNone.makeComponentRef(comp)
257 output = datastore.get(compRef)
258 self.assertEqual(output, getattr(metricsNone, comp))
260 # Check that a put fails if the dataset type is not supported
261 if self.hasUnsupportedPut:
262 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
263 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
264 with self.assertRaises(DatasetTypeNotSupportedError):
265 datastore.put(metrics, ref)
267 # These should raise
268 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId)
269 with self.assertRaises(FileNotFoundError):
270 # non-existing file
271 datastore.get(ref)
273 # Get a URI from it
274 uri = datastore.getURI(ref, predict=True)
275 self.assertEqual(uri.scheme, self.uriScheme)
277 with self.assertRaises(FileNotFoundError):
278 datastore.getURI(ref)
280 def testTrustGetRequest(self) -> None:
281 """Check that we can get datasets that registry knows nothing about."""
282 datastore = self.makeDatastore()
284 # Skip test if the attribute is not defined
285 if not hasattr(datastore, "trustGetRequest"):
286 return
288 metrics = makeExampleMetrics()
290 i = 0
291 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"):
292 i += 1
293 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
295 if sc_name == "StructuredComposite":
296 disassembled = True
297 else:
298 disassembled = False
300 # Start datastore in default configuration of using registry
301 datastore.trustGetRequest = False
303 # Create multiple storage classes for testing with or without
304 # disassembly
305 sc = self.storageClassFactory.getStorageClass(sc_name)
306 dimensions = self.universe.extract(("visit", "physical_filter"))
308 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"}
310 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
311 datastore.put(metrics, ref)
313 # Does it exist?
314 self.assertTrue(datastore.exists(ref))
315 self.assertTrue(datastore.knows(ref))
316 multi = datastore.knows_these([ref])
317 self.assertTrue(multi[ref])
318 multi = datastore.mexists([ref])
319 self.assertTrue(multi[ref])
321 # Get
322 metricsOut = datastore.get(ref)
323 self.assertEqual(metrics, metricsOut)
325 # Get the URI(s)
326 primaryURI, componentURIs = datastore.getURIs(ref)
327 if disassembled:
328 self.assertIsNone(primaryURI)
329 self.assertEqual(len(componentURIs), 3)
330 else:
331 self.assertIn(datasetTypeName, primaryURI.path)
332 self.assertFalse(componentURIs)
334 # Delete registry entry so now we are trusting
335 datastore.removeStoredItemInfo(ref)
337 # Now stop trusting and check that things break
338 datastore.trustGetRequest = False
340 # Does it exist?
341 self.assertFalse(datastore.exists(ref))
342 self.assertFalse(datastore.knows(ref))
343 multi = datastore.knows_these([ref])
344 self.assertFalse(multi[ref])
345 multi = datastore.mexists([ref])
346 self.assertFalse(multi[ref])
348 with self.assertRaises(FileNotFoundError):
349 datastore.get(ref)
351 if sc_name != "StructuredDataNoComponents":
352 with self.assertRaises(FileNotFoundError):
353 datastore.get(ref.makeComponentRef("data"))
355 # URI should fail unless we ask for prediction
356 with self.assertRaises(FileNotFoundError):
357 datastore.getURIs(ref)
359 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
360 if disassembled:
361 self.assertIsNone(predicted_primary)
362 self.assertEqual(len(predicted_disassembled), 3)
363 for uri in predicted_disassembled.values():
364 self.assertEqual(uri.fragment, "predicted")
365 self.assertIn(datasetTypeName, uri.path)
366 else:
367 self.assertIn(datasetTypeName, predicted_primary.path)
368 self.assertFalse(predicted_disassembled)
369 self.assertEqual(predicted_primary.fragment, "predicted")
371 # Now enable registry-free trusting mode
372 datastore.trustGetRequest = True
374 # Try again to get it
375 metricsOut = datastore.get(ref)
376 self.assertEqual(metricsOut, metrics)
378 # Does it exist?
379 self.assertTrue(datastore.exists(ref))
381 # Get a component
382 if sc_name != "StructuredDataNoComponents":
383 comp = "data"
384 compRef = ref.makeComponentRef(comp)
385 output = datastore.get(compRef)
386 self.assertEqual(output, getattr(metrics, comp))
388 # Get the URI -- if we trust this should work even without
389 # enabling prediction.
390 primaryURI2, componentURIs2 = datastore.getURIs(ref)
391 self.assertEqual(primaryURI2, primaryURI)
392 self.assertEqual(componentURIs2, componentURIs)
394 # Check for compatible storage class.
395 if sc_name in ("StructuredDataNoComponents", "StructuredData"):
396 # Make new dataset ref with compatible storage class.
397 ref_comp = ref.overrideStorageClass("StructuredDataDictJson")
399 # Without `set_retrieve_dataset_type_method` it will fail to
400 # find correct file.
401 self.assertFalse(datastore.exists(ref_comp))
402 with self.assertRaises(FileNotFoundError):
403 datastore.get(ref_comp)
404 with self.assertRaises(FileNotFoundError):
405 datastore.get(ref, storageClass="StructuredDataDictJson")
407 # Need a special method to generate stored dataset type.
408 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType:
409 if name == ref.datasetType.name:
410 return ref.datasetType
411 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}")
413 datastore.set_retrieve_dataset_type_method(_stored_dataset_type)
415 # Storage class override with original dataset ref.
416 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson")
417 self.assertIsInstance(metrics_as_dict, dict)
419 # get() should return a dict now.
420 metrics_as_dict = datastore.get(ref_comp)
421 self.assertIsInstance(metrics_as_dict, dict)
423 # exists() should work as well.
424 self.assertTrue(datastore.exists(ref_comp))
426 datastore.set_retrieve_dataset_type_method(None)
428 def testDisassembly(self) -> None:
429 """Test disassembly within datastore."""
430 metrics = makeExampleMetrics()
431 if self.isEphemeral:
432 # in-memory datastore does not disassemble
433 return
435 # Create multiple storage classes for testing different formulations
436 # of composites. One of these will not disassemble to provide
437 # a reference.
438 storageClasses = [
439 self.storageClassFactory.getStorageClass(sc)
440 for sc in (
441 "StructuredComposite",
442 "StructuredCompositeTestA",
443 "StructuredCompositeTestB",
444 "StructuredCompositeReadComp",
445 "StructuredData", # No disassembly
446 "StructuredCompositeReadCompNoDisassembly",
447 )
448 ]
450 # Create the test datastore
451 datastore = self.makeDatastore()
453 # Dummy dataId
454 dimensions = self.universe.extract(("visit", "physical_filter"))
455 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
457 for i, sc in enumerate(storageClasses):
458 with self.subTest(storageClass=sc.name):
459 # Create a different dataset type each time round
460 # so that a test failure in this subtest does not trigger
461 # a cascade of tests because of file clashes
462 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId)
464 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
466 datastore.put(metrics, ref)
468 baseURI, compURIs = datastore.getURIs(ref)
469 if disassembled:
470 self.assertIsNone(baseURI)
471 self.assertEqual(set(compURIs), {"data", "output", "summary"})
472 else:
473 self.assertIsNotNone(baseURI)
474 self.assertEqual(compURIs, {})
476 metrics_get = datastore.get(ref)
477 self.assertEqual(metrics_get, metrics)
479 # Retrieve the composite with read parameter
480 stop = 4
481 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
482 self.assertEqual(metrics_get.summary, metrics.summary)
483 self.assertEqual(metrics_get.output, metrics.output)
484 self.assertEqual(metrics_get.data, metrics.data[:stop])
486 # Retrieve a component
487 data = datastore.get(ref.makeComponentRef("data"))
488 self.assertEqual(data, metrics.data)
490 # On supported storage classes attempt to access a read
491 # only component
492 if "ReadComp" in sc.name:
493 cRef = ref.makeComponentRef("counter")
494 counter = datastore.get(cRef)
495 self.assertEqual(counter, len(metrics.data))
497 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
498 self.assertEqual(counter, stop)
500 datastore.remove(ref)
502 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]:
503 metrics = makeExampleMetrics()
504 datastore = self.makeDatastore()
505 # Put
506 dimensions = self.universe.extract(("visit", "physical_filter"))
507 sc = self.storageClassFactory.getStorageClass("StructuredData")
508 refs = []
509 for i in range(n_refs):
510 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"}
511 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
512 datastore.put(metrics, ref)
514 # Does it exist?
515 self.assertTrue(datastore.exists(ref))
517 # Get
518 metricsOut = datastore.get(ref)
519 self.assertEqual(metrics, metricsOut)
520 refs.append(ref)
522 return datastore, *refs
524 def testRemove(self) -> None:
525 datastore, ref = self.prepDeleteTest()
527 # Remove
528 datastore.remove(ref)
530 # Does it exist?
531 self.assertFalse(datastore.exists(ref))
533 # Do we now get a predicted URI?
534 uri = datastore.getURI(ref, predict=True)
535 self.assertEqual(uri.fragment, "predicted")
537 # Get should now fail
538 with self.assertRaises(FileNotFoundError):
539 datastore.get(ref)
540 # Can only delete once
541 with self.assertRaises(FileNotFoundError):
542 datastore.remove(ref)
544 def testForget(self) -> None:
545 datastore, ref = self.prepDeleteTest()
547 # Remove
548 datastore.forget([ref])
550 # Does it exist (as far as we know)?
551 self.assertFalse(datastore.exists(ref))
553 # Do we now get a predicted URI?
554 uri = datastore.getURI(ref, predict=True)
555 self.assertEqual(uri.fragment, "predicted")
557 # Get should now fail
558 with self.assertRaises(FileNotFoundError):
559 datastore.get(ref)
561 # Forgetting again is a silent no-op
562 datastore.forget([ref])
564 # Predicted URI should still point to the file.
565 self.assertTrue(uri.exists())
567 def testTransfer(self) -> None:
568 metrics = makeExampleMetrics()
570 dimensions = self.universe.extract(("visit", "physical_filter"))
571 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"}
573 sc = self.storageClassFactory.getStorageClass("StructuredData")
574 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
576 inputDatastore = self.makeDatastore("test_input_datastore")
577 outputDatastore = self.makeDatastore("test_output_datastore")
579 inputDatastore.put(metrics, ref)
580 outputDatastore.transfer(inputDatastore, ref)
582 metricsOut = outputDatastore.get(ref)
583 self.assertEqual(metrics, metricsOut)
585 def testBasicTransaction(self) -> None:
586 datastore = self.makeDatastore()
587 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
588 dimensions = self.universe.extract(("visit", "physical_filter"))
589 nDatasets = 6
590 dataIds = [
591 {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"} for i in range(nDatasets)
592 ]
593 data = [
594 (
595 self.makeDatasetRef("metric", dimensions, storageClass, dataId),
596 makeExampleMetrics(),
597 )
598 for dataId in dataIds
599 ]
600 succeed = data[: nDatasets // 2]
601 fail = data[nDatasets // 2 :]
602 # All datasets added in this transaction should continue to exist
603 with datastore.transaction():
604 for ref, metrics in succeed:
605 datastore.put(metrics, ref)
606 # Whereas datasets added in this transaction should not
607 with self.assertRaises(TransactionTestError):
608 with datastore.transaction():
609 for ref, metrics in fail:
610 datastore.put(metrics, ref)
611 raise TransactionTestError("This should propagate out of the context manager")
612 # Check for datasets that should exist
613 for ref, metrics in succeed:
614 # Does it exist?
615 self.assertTrue(datastore.exists(ref))
616 # Get
617 metricsOut = datastore.get(ref, parameters=None)
618 self.assertEqual(metrics, metricsOut)
619 # URI
620 uri = datastore.getURI(ref)
621 self.assertEqual(uri.scheme, self.uriScheme)
622 # Check for datasets that should not exist
623 for ref, _ in fail:
624 # These should raise
625 with self.assertRaises(FileNotFoundError):
626 # non-existing file
627 datastore.get(ref)
628 with self.assertRaises(FileNotFoundError):
629 datastore.getURI(ref)
631 def testNestedTransaction(self) -> None:
632 datastore = self.makeDatastore()
633 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
634 dimensions = self.universe.extract(("visit", "physical_filter"))
635 metrics = makeExampleMetrics()
637 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
638 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
639 datastore.put(metrics, refBefore)
640 with self.assertRaises(TransactionTestError):
641 with datastore.transaction():
642 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"}
643 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
644 datastore.put(metrics, refOuter)
645 with datastore.transaction():
646 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"}
647 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
648 datastore.put(metrics, refInner)
649 # All datasets should exist
650 for ref in (refBefore, refOuter, refInner):
651 metricsOut = datastore.get(ref, parameters=None)
652 self.assertEqual(metrics, metricsOut)
653 raise TransactionTestError("This should roll back the transaction")
654 # Dataset(s) inserted before the transaction should still exist
655 metricsOut = datastore.get(refBefore, parameters=None)
656 self.assertEqual(metrics, metricsOut)
657 # But all datasets inserted during the (rolled back) transaction
658 # should be gone
659 with self.assertRaises(FileNotFoundError):
660 datastore.get(refOuter)
661 with self.assertRaises(FileNotFoundError):
662 datastore.get(refInner)
664 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]:
665 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
666 dimensions = self.universe.extract(("visit", "physical_filter"))
667 metrics = makeExampleMetrics()
668 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
669 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
670 return metrics, ref
672 def runIngestTest(
673 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True
674 ) -> None:
675 metrics, ref = self._prepareIngestTest()
676 # The file will be deleted after the test.
677 # For symlink tests this leads to a situation where the datastore
678 # points to a file that does not exist. This will make os.path.exist
679 # return False but then the new symlink will fail with
680 # FileExistsError later in the code so the test still passes.
681 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
682 with open(path, "w") as fd:
683 yaml.dump(metrics._asdict(), stream=fd)
684 func(metrics, path, ref)
686 def testIngestNoTransfer(self) -> None:
687 """Test ingesting existing files with no transfer."""
688 for mode in (None, "auto"):
689 # Some datastores have auto but can't do in place transfer
690 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
691 continue
693 with self.subTest(mode=mode):
694 datastore = self.makeDatastore()
696 def succeed(
697 obj: MetricsExample,
698 path: str,
699 ref: DatasetRef,
700 mode: str | None = mode,
701 datastore: Datastore = datastore,
702 ) -> None:
703 """Ingest a file already in the datastore root."""
704 # first move it into the root, and adjust the path
705 # accordingly
706 path = shutil.copy(path, datastore.root.ospath)
707 path = os.path.relpath(path, start=datastore.root.ospath)
708 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
709 self.assertEqual(obj, datastore.get(ref))
711 def failInputDoesNotExist(
712 obj: MetricsExample,
713 path: str,
714 ref: DatasetRef,
715 mode: str | None = mode,
716 datastore: Datastore = datastore,
717 ) -> None:
718 """Can't ingest files if we're given a bad path."""
719 with self.assertRaises(FileNotFoundError):
720 datastore.ingest(
721 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
722 )
723 self.assertFalse(datastore.exists(ref))
725 def failOutsideRoot(
726 obj: MetricsExample,
727 path: str,
728 ref: DatasetRef,
729 mode: str | None = mode,
730 datastore: Datastore = datastore,
731 ) -> None:
732 """Can't ingest files outside of datastore root unless
733 auto.
734 """
735 if mode == "auto":
736 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
737 self.assertTrue(datastore.exists(ref))
738 else:
739 with self.assertRaises(RuntimeError):
740 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
741 self.assertFalse(datastore.exists(ref))
743 def failNotImplemented(
744 obj: MetricsExample,
745 path: str,
746 ref: DatasetRef,
747 mode: str | None = mode,
748 datastore: Datastore = datastore,
749 ) -> None:
750 with self.assertRaises(NotImplementedError):
751 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
753 if mode in self.ingestTransferModes:
754 self.runIngestTest(failOutsideRoot)
755 self.runIngestTest(failInputDoesNotExist)
756 self.runIngestTest(succeed)
757 else:
758 self.runIngestTest(failNotImplemented)
760 def testIngestTransfer(self) -> None:
761 """Test ingesting existing files after transferring them."""
762 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
763 with self.subTest(mode=mode):
764 datastore = self.makeDatastore(mode)
766 def succeed(
767 obj: MetricsExample,
768 path: str,
769 ref: DatasetRef,
770 mode: str | None = mode,
771 datastore: Datastore = datastore,
772 ) -> None:
773 """Ingest a file by transferring it to the template
774 location.
775 """
776 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
777 self.assertEqual(obj, datastore.get(ref))
779 def failInputDoesNotExist(
780 obj: MetricsExample,
781 path: str,
782 ref: DatasetRef,
783 mode: str | None = mode,
784 datastore: Datastore = datastore,
785 ) -> None:
786 """Can't ingest files if we're given a bad path."""
787 with self.assertRaises(FileNotFoundError):
788 # Ensure the file does not look like it is in
789 # datastore for auto mode
790 datastore.ingest(
791 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
792 )
793 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
795 def failNotImplemented(
796 obj: MetricsExample,
797 path: str,
798 ref: DatasetRef,
799 mode: str | None = mode,
800 datastore: Datastore = datastore,
801 ) -> None:
802 with self.assertRaises(NotImplementedError):
803 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
805 if mode in self.ingestTransferModes:
806 self.runIngestTest(failInputDoesNotExist)
807 self.runIngestTest(succeed, expectOutput=(mode != "move"))
808 else:
809 self.runIngestTest(failNotImplemented)
811 def testIngestSymlinkOfSymlink(self) -> None:
812 """Special test for symlink to a symlink ingest"""
813 metrics, ref = self._prepareIngestTest()
814 # The aim of this test is to create a dataset on disk, then
815 # create a symlink to it and finally ingest the symlink such that
816 # the symlink in the datastore points to the original dataset.
817 for mode in ("symlink", "relsymlink"):
818 if mode not in self.ingestTransferModes:
819 continue
821 print(f"Trying mode {mode}")
822 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
823 with open(realpath, "w") as fd:
824 yaml.dump(metrics._asdict(), stream=fd)
825 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
826 os.symlink(os.path.abspath(realpath), sympath)
828 datastore = self.makeDatastore()
829 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
831 uri = datastore.getURI(ref)
832 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
833 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
835 linkTarget = os.readlink(uri.ospath)
836 if mode == "relsymlink":
837 self.assertFalse(os.path.isabs(linkTarget))
838 else:
839 self.assertEqual(linkTarget, os.path.abspath(realpath))
841 # Check that we can get the dataset back regardless of mode
842 metric2 = datastore.get(ref)
843 self.assertEqual(metric2, metrics)
845 # Cleanup the file for next time round loop
846 # since it will get the same file name in store
847 datastore.remove(ref)
849 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
850 datastore = self.makeDatastore(name)
852 # For now only the FileDatastore can be used for this test.
853 # ChainedDatastore that only includes InMemoryDatastores have to be
854 # skipped as well.
855 for name in datastore.names:
856 if not name.startswith("InMemoryDatastore"):
857 break
858 else:
859 raise unittest.SkipTest("in-memory datastore does not support record export/import")
861 metrics = makeExampleMetrics()
862 dimensions = self.universe.extract(("visit", "physical_filter"))
863 sc = self.storageClassFactory.getStorageClass("StructuredData")
865 refs = []
866 for visit in (2048, 2049, 2050):
867 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"}
868 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
869 datastore.put(metrics, ref)
870 refs.append(ref)
871 return datastore, refs
873 def testExportImportRecords(self) -> None:
874 """Test for export_records and import_records methods."""
875 datastore, refs = self._populate_export_datastore("test_datastore")
876 for exported_refs in (refs, refs[1:]):
877 n_refs = len(exported_refs)
878 records = datastore.export_records(exported_refs)
879 self.assertGreater(len(records), 0)
880 self.assertTrue(set(records.keys()) <= set(datastore.names))
881 # In a ChainedDatastore each FileDatastore will have a complete set
882 for datastore_name in records:
883 record_data = records[datastore_name]
884 self.assertEqual(len(record_data.records), n_refs)
886 # Check that subsetting works, include non-existing dataset ID.
887 dataset_ids = {exported_refs[0].id, uuid.uuid4()}
888 subset = record_data.subset(dataset_ids)
889 assert subset is not None
890 self.assertEqual(len(subset.records), 1)
891 subset = record_data.subset({uuid.uuid4()})
892 self.assertIsNone(subset)
894 # Use the same datastore name to import relative path.
895 datastore2 = self.makeDatastore("test_datastore")
897 records = datastore.export_records(refs[1:])
898 datastore2.import_records(records)
900 with self.assertRaises(FileNotFoundError):
901 data = datastore2.get(refs[0])
902 data = datastore2.get(refs[1])
903 self.assertIsNotNone(data)
904 data = datastore2.get(refs[2])
905 self.assertIsNotNone(data)
907 def testExport(self) -> None:
908 datastore, refs = self._populate_export_datastore("test_datastore")
910 datasets = list(datastore.export(refs))
911 self.assertEqual(len(datasets), 3)
913 for transfer in (None, "auto"):
914 # Both will default to None
915 datasets = list(datastore.export(refs, transfer=transfer))
916 self.assertEqual(len(datasets), 3)
918 with self.assertRaises(TypeError):
919 list(datastore.export(refs, transfer="copy"))
921 with self.assertRaises(TypeError):
922 list(datastore.export(refs, directory="exportDir", transfer="move"))
924 # Create a new ref that is not known to the datastore and try to
925 # export it.
926 sc = self.storageClassFactory.getStorageClass("ThingOne")
927 dimensions = self.universe.extract(("visit", "physical_filter"))
928 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
929 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
930 with self.assertRaises(FileNotFoundError):
931 list(datastore.export(refs + [ref], transfer=None))
933 def test_pydantic_dict_storage_class_conversions(self) -> None:
934 """Test converting a dataset stored as a pydantic model into a dict on
935 read.
936 """
937 datastore = self.makeDatastore()
938 store_as_model = self.makeDatasetRef(
939 "store_as_model",
940 dimensions=self.universe.empty,
941 storageClass="DictConvertibleModel",
942 dataId=DataCoordinate.makeEmpty(self.universe),
943 )
944 content = {"a": "one", "b": "two"}
945 model = DictConvertibleModel.from_dict(content, extra="original content")
946 datastore.put(model, store_as_model)
947 retrieved_model = datastore.get(store_as_model)
948 self.assertEqual(retrieved_model, model)
949 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
950 self.assertEqual(type(loaded), dict)
951 self.assertEqual(loaded, content)
953 def test_simple_class_put_get(self) -> None:
954 """Test that we can put and get a simple class with dict()
955 constructor.
956 """
957 datastore = self.makeDatastore()
958 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
959 self._assert_different_puts(datastore, "MetricsExample", data)
961 def test_dataclass_put_get(self) -> None:
962 """Test that we can put and get a simple dataclass."""
963 datastore = self.makeDatastore()
964 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
965 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
967 def test_pydantic_put_get(self) -> None:
968 """Test that we can put and get a simple Pydantic model."""
969 datastore = self.makeDatastore()
970 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
971 self._assert_different_puts(datastore, "MetricsExampleModel", data)
973 def test_tuple_put_get(self) -> None:
974 """Test that we can put and get a tuple."""
975 datastore = self.makeDatastore()
976 data = ("a", "b", 1)
977 self._assert_different_puts(datastore, "TupleExample", data)
979 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None:
980 refs = {
981 x: self.makeDatasetRef(
982 f"stora_as_{x}",
983 dimensions=self.universe.empty,
984 storageClass=f"{storageClass_root}{x}",
985 dataId=DataCoordinate.makeEmpty(self.universe),
986 )
987 for x in ["A", "B"]
988 }
990 for ref in refs.values():
991 datastore.put(data, ref)
993 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
996class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
997 """PosixDatastore specialization"""
999 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1000 uriScheme = "file"
1001 canIngestNoTransferAuto = True
1002 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
1003 isEphemeral = False
1004 rootKeys = ("root",)
1005 validationCanFail = True
1007 def setUp(self) -> None:
1008 # Override the working directory before calling the base class
1009 self.root = tempfile.mkdtemp(dir=TESTDIR)
1010 super().setUp()
1012 def testAtomicWrite(self) -> None:
1013 """Test that we write to a temporary and then rename"""
1014 datastore = self.makeDatastore()
1015 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1016 dimensions = self.universe.extract(("visit", "physical_filter"))
1017 metrics = makeExampleMetrics()
1019 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
1020 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1022 with self.assertLogs("lsst.resources", "DEBUG") as cm:
1023 datastore.put(metrics, ref)
1024 move_logs = [ll for ll in cm.output if "transfer=" in ll]
1025 self.assertIn("transfer=move", move_logs[0])
1027 # And the transfer should be file to file.
1028 self.assertEqual(move_logs[0].count("file://"), 2)
1030 def testCanNotDeterminePutFormatterLocation(self) -> None:
1031 """Verify that the expected exception is raised if the FileDatastore
1032 can not determine the put formatter location.
1033 """
1034 _ = makeExampleMetrics()
1035 datastore = self.makeDatastore()
1037 # Create multiple storage classes for testing different formulations
1038 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1040 sccomp = StorageClass("Dummy")
1041 compositeStorageClass = StorageClass(
1042 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1043 )
1045 dimensions = self.universe.extract(("visit", "physical_filter"))
1046 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
1048 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1049 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId)
1051 def raiser(ref: DatasetRef) -> None:
1052 raise DatasetTypeNotSupportedError()
1054 with unittest.mock.patch.object(
1055 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1056 "_determine_put_formatter_location",
1057 side_effect=raiser,
1058 ):
1059 # verify the non-composite ref execution path:
1060 with self.assertRaises(DatasetTypeNotSupportedError):
1061 datastore.getURIs(ref, predict=True)
1063 # verify the composite-ref execution path:
1064 with self.assertRaises(DatasetTypeNotSupportedError):
1065 datastore.getURIs(compRef, predict=True)
1067 def test_roots(self):
1068 datastore = self.makeDatastore()
1070 self.assertEqual(set(datastore.names), set(datastore.roots.keys()))
1071 for root in datastore.roots.values():
1072 if root is not None:
1073 self.assertTrue(root.exists())
1076class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1077 """Posix datastore tests but with checksums disabled."""
1079 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1081 def testChecksum(self) -> None:
1082 """Ensure that checksums have not been calculated."""
1083 datastore = self.makeDatastore()
1084 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1085 dimensions = self.universe.extract(("visit", "physical_filter"))
1086 metrics = makeExampleMetrics()
1088 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
1089 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1091 # Configuration should have disabled checksum calculation
1092 datastore.put(metrics, ref)
1093 infos = datastore.getStoredItemsInfo(ref)
1094 self.assertIsNone(infos[0].checksum)
1096 # Remove put back but with checksums enabled explicitly
1097 datastore.remove(ref)
1098 datastore.useChecksum = True
1099 datastore.put(metrics, ref)
1101 infos = datastore.getStoredItemsInfo(ref)
1102 self.assertIsNotNone(infos[0].checksum)
1105class TrashDatastoreTestCase(PosixDatastoreTestCase):
1106 """Restrict trash test to FileDatastore."""
1108 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1110 def testTrash(self) -> None:
1111 datastore, *refs = self.prepDeleteTest(n_refs=10)
1113 # Trash one of them.
1114 ref = refs.pop()
1115 uri = datastore.getURI(ref)
1116 datastore.trash(ref)
1117 self.assertTrue(uri.exists(), uri) # Not deleted yet
1118 datastore.emptyTrash()
1119 self.assertFalse(uri.exists(), uri)
1121 # Trash it again should be fine.
1122 datastore.trash(ref)
1124 # Trash multiple items at once.
1125 subset = [refs.pop(), refs.pop()]
1126 datastore.trash(subset)
1127 datastore.emptyTrash()
1129 # Remove a record and trash should do nothing.
1130 # This is execution butler scenario.
1131 ref = refs.pop()
1132 uri = datastore.getURI(ref)
1133 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1134 self.assertTrue(uri.exists())
1135 datastore.trash(ref)
1136 datastore.emptyTrash()
1137 self.assertTrue(uri.exists())
1139 # Switch on trust and it should delete the file.
1140 datastore.trustGetRequest = True
1141 datastore.trash([ref])
1142 self.assertFalse(uri.exists())
1144 # Remove multiples at once in trust mode.
1145 subset = [refs.pop() for i in range(3)]
1146 datastore.trash(subset)
1147 datastore.trash(refs.pop()) # Check that a single ref can trash
1150class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1151 """Test datastore cleans up on failure."""
1153 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1155 def setUp(self) -> None:
1156 # Override the working directory before calling the base class
1157 self.root = tempfile.mkdtemp(dir=TESTDIR)
1158 super().setUp()
1160 def testCleanup(self) -> None:
1161 """Test that a failed formatter write does cleanup a partial file."""
1162 metrics = makeExampleMetrics()
1163 datastore = self.makeDatastore()
1165 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1167 dimensions = self.universe.extract(("visit", "physical_filter"))
1168 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
1170 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1172 # Determine where the file will end up (we assume Formatters use
1173 # the same file extension)
1174 expectedUri = datastore.getURI(ref, predict=True)
1175 self.assertEqual(expectedUri.fragment, "predicted")
1177 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1179 # Try formatter that fails and formatter that fails and leaves
1180 # a file behind
1181 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1182 with self.subTest(formatter=formatter):
1183 # Monkey patch the formatter
1184 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1186 # Try to put the dataset, it should fail
1187 with self.assertRaises(RuntimeError):
1188 datastore.put(metrics, ref)
1190 # Check that there is no file on disk
1191 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1193 # Check that there is a directory
1194 dir = expectedUri.dirname()
1195 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1197 # Force YamlFormatter and check that this time a file is written
1198 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1199 datastore.put(metrics, ref)
1200 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1201 datastore.remove(ref)
1202 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1205class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1206 """PosixDatastore specialization"""
1208 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1209 uriScheme = "mem"
1210 hasUnsupportedPut = False
1211 ingestTransferModes = ()
1212 isEphemeral = True
1213 rootKeys = None
1214 validationCanFail = False
1217class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1218 """ChainedDatastore specialization using a POSIXDatastore"""
1220 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1221 hasUnsupportedPut = False
1222 canIngestNoTransferAuto = False
1223 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1224 isEphemeral = False
1225 rootKeys = (".datastores.1.root", ".datastores.2.root")
1226 validationCanFail = True
1229class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1230 """ChainedDatastore specialization using all InMemoryDatastore"""
1232 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1233 validationCanFail = False
1236class DatastoreConstraintsTests(DatastoreTestsBase):
1237 """Basic tests of constraints model of Datastores."""
1239 def testConstraints(self) -> None:
1240 """Test constraints model. Assumes that each test class has the
1241 same constraints.
1242 """
1243 metrics = makeExampleMetrics()
1244 datastore = self.makeDatastore()
1246 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1247 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1248 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1249 dataId = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}
1251 # Write empty file suitable for ingest check (JSON and YAML variants)
1252 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1253 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1254 for datasetTypeName, sc, accepted in (
1255 ("metric", sc1, True),
1256 ("metric5", sc1, False),
1257 ("metric33", sc1, True),
1258 ("metric5", sc2, True),
1259 ):
1260 # Choose different temp file depending on StorageClass
1261 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1263 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1264 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
1265 if accepted:
1266 datastore.put(metrics, ref)
1267 self.assertTrue(datastore.exists(ref))
1268 datastore.remove(ref)
1270 # Try ingest
1271 if self.canIngest:
1272 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1273 self.assertTrue(datastore.exists(ref))
1274 datastore.remove(ref)
1275 else:
1276 with self.assertRaises(DatasetTypeNotSupportedError):
1277 datastore.put(metrics, ref)
1278 self.assertFalse(datastore.exists(ref))
1280 # Again with ingest
1281 if self.canIngest:
1282 with self.assertRaises(DatasetTypeNotSupportedError):
1283 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1284 self.assertFalse(datastore.exists(ref))
1287class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1288 """PosixDatastore specialization"""
1290 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1291 canIngest = True
1293 def setUp(self) -> None:
1294 # Override the working directory before calling the base class
1295 self.root = tempfile.mkdtemp(dir=TESTDIR)
1296 super().setUp()
1299class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1300 """InMemoryDatastore specialization."""
1302 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1303 canIngest = False
1306class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1307 """ChainedDatastore specialization using a POSIXDatastore and constraints
1308 at the ChainedDatstore.
1309 """
1311 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1314class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1315 """ChainedDatastore specialization using a POSIXDatastore."""
1317 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1320class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1321 """ChainedDatastore specialization using all InMemoryDatastore."""
1323 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1324 canIngest = False
1327class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1328 """Test that a chained datastore can control constraints per-datastore
1329 even if child datastore would accept.
1330 """
1332 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1334 def setUp(self) -> None:
1335 # Override the working directory before calling the base class
1336 self.root = tempfile.mkdtemp(dir=TESTDIR)
1337 super().setUp()
1339 def testConstraints(self) -> None:
1340 """Test chained datastore constraints model."""
1341 metrics = makeExampleMetrics()
1342 datastore = self.makeDatastore()
1344 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1345 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1346 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1347 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}
1348 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"}
1350 # Write empty file suitable for ingest check (JSON and YAML variants)
1351 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1352 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1354 for typeName, dataId, sc, accept, ingest in (
1355 ("metric", dataId1, sc1, (False, True, False), True),
1356 ("metric5", dataId1, sc1, (False, False, False), False),
1357 ("metric5", dataId2, sc1, (True, False, False), False),
1358 ("metric33", dataId2, sc2, (True, True, False), True),
1359 ("metric5", dataId1, sc2, (False, True, False), True),
1360 ):
1361 # Choose different temp file depending on StorageClass
1362 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1364 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1365 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId)
1366 if any(accept):
1367 datastore.put(metrics, ref)
1368 self.assertTrue(datastore.exists(ref))
1370 # Check each datastore inside the chained datastore
1371 for childDatastore, expected in zip(datastore.datastores, accept, strict=True):
1372 self.assertEqual(
1373 childDatastore.exists(ref),
1374 expected,
1375 f"Testing presence of {ref} in datastore {childDatastore.name}",
1376 )
1378 datastore.remove(ref)
1380 # Check that ingest works
1381 if ingest:
1382 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1383 self.assertTrue(datastore.exists(ref))
1385 # Check each datastore inside the chained datastore
1386 for childDatastore, expected in zip(datastore.datastores, accept, strict=True):
1387 # Ephemeral datastores means InMemory at the moment
1388 # and that does not accept ingest of files.
1389 if childDatastore.isEphemeral:
1390 expected = False
1391 self.assertEqual(
1392 childDatastore.exists(ref),
1393 expected,
1394 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1395 )
1397 datastore.remove(ref)
1398 else:
1399 with self.assertRaises(DatasetTypeNotSupportedError):
1400 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1402 else:
1403 with self.assertRaises(DatasetTypeNotSupportedError):
1404 datastore.put(metrics, ref)
1405 self.assertFalse(datastore.exists(ref))
1407 # Again with ingest
1408 with self.assertRaises(DatasetTypeNotSupportedError):
1409 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1410 self.assertFalse(datastore.exists(ref))
1413class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1414 """Tests for datastore caching infrastructure."""
1416 @classmethod
1417 def setUpClass(cls) -> None:
1418 cls.storageClassFactory = StorageClassFactory()
1419 cls.universe = DimensionUniverse()
1421 # Ensure that we load the test storage class definitions.
1422 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1423 cls.storageClassFactory.addFromConfig(scConfigFile)
1425 def setUp(self) -> None:
1426 self.id = 0
1428 # Create a root that we can use for caching tests.
1429 self.root = tempfile.mkdtemp(dir=TESTDIR)
1431 # Create some test dataset refs and associated test files
1432 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1433 dimensions = self.universe.extract(("visit", "physical_filter"))
1434 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
1436 # Create list of refs and list of temporary files
1437 n_datasets = 10
1438 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)]
1440 root_uri = ResourcePath(self.root, forceDirectory=True)
1441 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1443 # Create test files.
1444 for uri in self.files:
1445 uri.write(b"0123456789")
1447 # Create some composite refs with component files.
1448 sc = self.storageClassFactory.getStorageClass("StructuredData")
1449 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)]
1450 self.comp_files = []
1451 self.comp_refs = []
1452 for n, ref in enumerate(self.composite_refs):
1453 component_refs = []
1454 component_files = []
1455 for component in sc.components:
1456 component_ref = ref.makeComponentRef(component)
1457 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1458 component_refs.append(component_ref)
1459 component_files.append(file)
1460 file.write(b"9876543210")
1462 self.comp_files.append(component_files)
1463 self.comp_refs.append(component_refs)
1465 def tearDown(self) -> None:
1466 if self.root is not None and os.path.exists(self.root):
1467 shutil.rmtree(self.root, ignore_errors=True)
1469 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1470 config = Config.fromYaml(config_str)
1471 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1473 def testNoCacheDir(self) -> None:
1474 config_str = """
1475cached:
1476 root: null
1477 cacheable:
1478 metric0: true
1479 """
1480 cache_manager = self._make_cache_manager(config_str)
1482 # Look inside to check we don't have a cache directory
1483 self.assertIsNone(cache_manager._cache_directory)
1485 self.assertCache(cache_manager)
1487 # Test that the cache directory is marked temporary
1488 self.assertTrue(cache_manager.cache_directory.isTemporary)
1490 def testNoCacheDirReversed(self) -> None:
1491 """Use default caching status and metric1 to false"""
1492 config_str = """
1493cached:
1494 root: null
1495 default: true
1496 cacheable:
1497 metric1: false
1498 """
1499 cache_manager = self._make_cache_manager(config_str)
1501 self.assertCache(cache_manager)
1503 def testEnvvarCacheDir(self) -> None:
1504 config_str = f"""
1505cached:
1506 root: '{self.root}'
1507 cacheable:
1508 metric0: true
1509 """
1511 root = ResourcePath(self.root, forceDirectory=True)
1512 env_dir = root.join("somewhere", forceDirectory=True)
1513 elsewhere = root.join("elsewhere", forceDirectory=True)
1515 # Environment variable should override the config value.
1516 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1517 cache_manager = self._make_cache_manager(config_str)
1518 self.assertEqual(cache_manager.cache_directory, env_dir)
1520 # This environment variable should not override the config value.
1521 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1522 cache_manager = self._make_cache_manager(config_str)
1523 self.assertEqual(cache_manager.cache_directory, root)
1525 # No default setting.
1526 config_str = """
1527cached:
1528 root: null
1529 default: true
1530 cacheable:
1531 metric1: false
1532 """
1533 cache_manager = self._make_cache_manager(config_str)
1535 # This environment variable should override the config value.
1536 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1537 cache_manager = self._make_cache_manager(config_str)
1538 self.assertEqual(cache_manager.cache_directory, env_dir)
1540 # If both environment variables are set the main (not IF_UNSET)
1541 # variable should win.
1542 with unittest.mock.patch.dict(
1543 os.environ,
1544 {
1545 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1546 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1547 },
1548 ):
1549 cache_manager = self._make_cache_manager(config_str)
1550 self.assertEqual(cache_manager.cache_directory, env_dir)
1552 # Use the API to set the environment variable, making sure that the
1553 # variable is reset on exit.
1554 with unittest.mock.patch.dict(
1555 os.environ,
1556 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1557 ):
1558 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1559 self.assertTrue(defined)
1560 cache_manager = self._make_cache_manager(config_str)
1561 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1563 # Now create the cache manager ahead of time and set the fallback
1564 # later.
1565 cache_manager = self._make_cache_manager(config_str)
1566 self.assertIsNone(cache_manager._cache_directory)
1567 with unittest.mock.patch.dict(
1568 os.environ,
1569 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1570 ):
1571 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1572 self.assertTrue(defined)
1573 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1575 def testExplicitCacheDir(self) -> None:
1576 config_str = f"""
1577cached:
1578 root: '{self.root}'
1579 cacheable:
1580 metric0: true
1581 """
1582 cache_manager = self._make_cache_manager(config_str)
1584 # Look inside to check we do have a cache directory.
1585 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1587 self.assertCache(cache_manager)
1589 # Test that the cache directory is not marked temporary
1590 self.assertFalse(cache_manager.cache_directory.isTemporary)
1592 def assertCache(self, cache_manager: DatastoreCacheManager) -> None:
1593 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1594 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1596 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1597 self.assertIsInstance(uri, ResourcePath)
1598 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1600 # Check presence in cache using ref and then using file extension.
1601 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1602 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1603 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1604 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1606 # Cached file should no longer exist but uncached file should be
1607 # unaffected.
1608 self.assertFalse(self.files[0].exists())
1609 self.assertTrue(self.files[1].exists())
1611 # Should find this file and it should be within the cache directory.
1612 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1613 self.assertTrue(found.exists())
1614 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1616 # Should not be able to find these in cache
1617 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1618 self.assertIsNone(found)
1619 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1620 self.assertIsNone(found)
1622 def testNoCache(self) -> None:
1623 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1624 for uri, ref in zip(self.files, self.refs, strict=True):
1625 self.assertFalse(cache_manager.should_be_cached(ref))
1626 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1627 self.assertFalse(cache_manager.known_to_cache(ref))
1628 with cache_manager.find_in_cache(ref, ".txt") as found:
1629 self.assertIsNone(found, msg=f"{cache_manager}")
1631 def _expiration_config(self, mode: str, threshold: int) -> str:
1632 return f"""
1633cached:
1634 default: true
1635 expiry:
1636 mode: {mode}
1637 threshold: {threshold}
1638 cacheable:
1639 unused: true
1640 """
1642 def testCacheExpiryFiles(self) -> None:
1643 threshold = 2 # Keep at least 2 files.
1644 mode = "files"
1645 config_str = self._expiration_config(mode, threshold)
1647 cache_manager = self._make_cache_manager(config_str)
1649 # Check that an empty cache returns unknown for arbitrary ref
1650 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1652 # Should end with datasets: 2, 3, 4
1653 self.assertExpiration(cache_manager, 5, threshold + 1)
1654 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1656 # Check that we will not expire a file that is actively in use.
1657 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1658 self.assertIsNotNone(found)
1660 # Trigger cache expiration that should remove the file
1661 # we just retrieved. Should now have: 3, 4, 5
1662 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1663 self.assertIsNotNone(cached)
1665 # Cache should still report the standard file count.
1666 self.assertEqual(cache_manager.file_count, threshold + 1)
1668 # Add additional entry to cache.
1669 # Should now have 4, 5, 6
1670 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1671 self.assertIsNotNone(cached)
1673 # Is the file still there?
1674 self.assertTrue(found.exists())
1676 # Can we read it?
1677 data = found.read()
1678 self.assertGreater(len(data), 0)
1680 # Outside context the file should no longer exist.
1681 self.assertFalse(found.exists())
1683 # File count should not have changed.
1684 self.assertEqual(cache_manager.file_count, threshold + 1)
1686 # Dataset 2 was in the exempt directory but because hardlinks
1687 # are used it was deleted from the main cache during cache expiry
1688 # above and so should no longer be found.
1689 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1690 self.assertIsNone(found)
1692 # And the one stored after it is also gone.
1693 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1694 self.assertIsNone(found)
1696 # But dataset 4 is present.
1697 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1698 self.assertIsNotNone(found)
1700 # Adding a new dataset to the cache should now delete it.
1701 cache_manager.move_to_cache(self.files[7], self.refs[7])
1703 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1704 self.assertIsNone(found)
1706 def testCacheExpiryDatasets(self) -> None:
1707 threshold = 2 # Keep 2 datasets.
1708 mode = "datasets"
1709 config_str = self._expiration_config(mode, threshold)
1711 cache_manager = self._make_cache_manager(config_str)
1712 self.assertExpiration(cache_manager, 5, threshold + 1)
1713 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1715 def testCacheExpiryDatasetsComposite(self) -> None:
1716 threshold = 2 # Keep 2 datasets.
1717 mode = "datasets"
1718 config_str = self._expiration_config(mode, threshold)
1720 cache_manager = self._make_cache_manager(config_str)
1722 n_datasets = 3
1723 for i in range(n_datasets):
1724 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True):
1725 cached = cache_manager.move_to_cache(component_file, component_ref)
1726 self.assertIsNotNone(cached)
1727 self.assertTrue(cache_manager.known_to_cache(component_ref))
1728 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1729 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1731 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1733 # Write two new non-composite and the number of files should drop.
1734 self.assertExpiration(cache_manager, 2, 5)
1736 def testCacheExpirySize(self) -> None:
1737 threshold = 55 # Each file is 10 bytes
1738 mode = "size"
1739 config_str = self._expiration_config(mode, threshold)
1741 cache_manager = self._make_cache_manager(config_str)
1742 self.assertExpiration(cache_manager, 10, 6)
1743 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1745 def assertExpiration(
1746 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int
1747 ) -> None:
1748 """Insert the datasets and then check the number retained."""
1749 for i in range(n_datasets):
1750 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1751 self.assertIsNotNone(cached)
1753 self.assertEqual(cache_manager.file_count, n_retained)
1755 # The oldest file should not be in the cache any more.
1756 for i in range(n_datasets):
1757 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1758 if i >= n_datasets - n_retained:
1759 self.assertIsInstance(found, ResourcePath)
1760 else:
1761 self.assertIsNone(found)
1763 def testCacheExpiryAge(self) -> None:
1764 threshold = 1 # Expire older than 2 seconds
1765 mode = "age"
1766 config_str = self._expiration_config(mode, threshold)
1768 cache_manager = self._make_cache_manager(config_str)
1769 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1771 # Insert 3 files, then sleep, then insert more.
1772 for i in range(2):
1773 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1774 self.assertIsNotNone(cached)
1775 time.sleep(2.0)
1776 for j in range(4):
1777 i = 2 + j # Continue the counting
1778 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1779 self.assertIsNotNone(cached)
1781 # Only the files written after the sleep should exist.
1782 self.assertEqual(cache_manager.file_count, 4)
1783 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1784 self.assertIsNone(found)
1785 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1786 self.assertIsInstance(found, ResourcePath)
1789class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase):
1790 """Test the null datastore."""
1792 storageClassFactory = StorageClassFactory()
1794 def test_basics(self) -> None:
1795 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1796 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1798 null = NullDatastore(None, None)
1800 self.assertFalse(null.exists(ref))
1801 self.assertFalse(null.knows(ref))
1802 knows = null.knows_these([ref])
1803 self.assertFalse(knows[ref])
1804 null.validateConfiguration(ref)
1806 with self.assertRaises(FileNotFoundError):
1807 null.get(ref)
1808 with self.assertRaises(NotImplementedError):
1809 null.put("", ref)
1810 with self.assertRaises(FileNotFoundError):
1811 null.getURI(ref)
1812 with self.assertRaises(FileNotFoundError):
1813 null.getURIs(ref)
1814 with self.assertRaises(FileNotFoundError):
1815 null.getManyURIs([ref])
1816 with self.assertRaises(NotImplementedError):
1817 null.getLookupKeys()
1818 with self.assertRaises(NotImplementedError):
1819 null.import_records({})
1820 with self.assertRaises(NotImplementedError):
1821 null.export_records([])
1822 with self.assertRaises(NotImplementedError):
1823 null.export([ref])
1824 with self.assertRaises(NotImplementedError):
1825 null.transfer(null, ref)
1826 with self.assertRaises(NotImplementedError):
1827 null.emptyTrash()
1828 with self.assertRaises(NotImplementedError):
1829 null.trash(ref)
1830 with self.assertRaises(NotImplementedError):
1831 null.forget([ref])
1832 with self.assertRaises(NotImplementedError):
1833 null.remove(ref)
1834 with self.assertRaises(NotImplementedError):
1835 null.retrieveArtifacts([ref], ResourcePath("."))
1836 with self.assertRaises(NotImplementedError):
1837 null.transfer_from(null, [ref])
1838 with self.assertRaises(NotImplementedError):
1839 null.ingest()
1842class DatasetRefURIsTestCase(unittest.TestCase):
1843 """Tests for DatasetRefURIs."""
1845 def testSequenceAccess(self) -> None:
1846 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1847 uris = DatasetRefURIs()
1849 self.assertEqual(len(uris), 2)
1850 self.assertEqual(uris[0], None)
1851 self.assertEqual(uris[1], {})
1853 primaryURI = ResourcePath("1/2/3")
1854 componentURI = ResourcePath("a/b/c")
1856 # affirm that DatasetRefURIs does not support MutableSequence functions
1857 with self.assertRaises(TypeError):
1858 uris[0] = primaryURI
1859 with self.assertRaises(TypeError):
1860 uris[1] = {"foo": componentURI}
1862 # but DatasetRefURIs can be set by property name:
1863 uris.primaryURI = primaryURI
1864 uris.componentURIs = {"foo": componentURI}
1865 self.assertEqual(uris.primaryURI, primaryURI)
1866 self.assertEqual(uris[0], primaryURI)
1868 primary, components = uris
1869 self.assertEqual(primary, primaryURI)
1870 self.assertEqual(components, {"foo": componentURI})
1872 def testRepr(self) -> None:
1873 """Verify __repr__ output."""
1874 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")})
1875 self.assertEqual(
1876 repr(uris),
1877 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})',
1878 )
1881class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase):
1882 """Test the StoredFileInfo class."""
1884 storageClassFactory = StorageClassFactory()
1886 def test_StoredFileInfo(self) -> None:
1887 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1888 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1890 record = dict(
1891 storage_class="StructuredDataDict",
1892 formatter="lsst.daf.butler.Formatter",
1893 path="a/b/c.txt",
1894 component="component",
1895 dataset_id=ref.id,
1896 checksum=None,
1897 file_size=5,
1898 )
1899 info = StoredFileInfo.from_record(record)
1901 self.assertEqual(info.dataset_id, ref.id)
1902 self.assertEqual(info.to_record(), record)
1904 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1905 rebased = info.rebase(ref2)
1906 self.assertEqual(rebased.dataset_id, ref2.id)
1907 self.assertEqual(rebased.rebase(ref), info)
1909 with self.assertRaises(TypeError):
1910 rebased.update(formatter=42)
1912 with self.assertRaises(ValueError):
1913 rebased.update(something=42, new="42")
1915 # Check that pickle works on StoredFileInfo.
1916 pickled_info = pickle.dumps(info)
1917 unpickled_info = pickle.loads(pickled_info)
1918 self.assertEqual(unpickled_info, info)
1921if __name__ == "__main__":
1922 unittest.main()