Coverage for tests/test_datastore.py: 12%
1015 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import pickle
26import shutil
27import tempfile
28import time
29import unittest
30import unittest.mock
31import uuid
32from collections.abc import Callable
33from typing import Any, cast
35import lsst.utils.tests
36import yaml
37from lsst.daf.butler import (
38 Config,
39 DataCoordinate,
40 DatasetRef,
41 DatasetRefURIs,
42 DatasetType,
43 DatasetTypeNotSupportedError,
44 Datastore,
45 DatastoreCacheManager,
46 DatastoreCacheManagerConfig,
47 DatastoreConfig,
48 DatastoreDisabledCacheManager,
49 DatastoreValidationError,
50 DimensionUniverse,
51 FileDataset,
52 StorageClass,
53 StorageClassFactory,
54 StoredFileInfo,
55)
56from lsst.daf.butler.formatters.yaml import YamlFormatter
57from lsst.daf.butler.tests import (
58 BadNoWriteFormatter,
59 BadWriteFormatter,
60 DatasetTestHelper,
61 DatastoreTestHelper,
62 DummyRegistry,
63 MetricsExample,
64 MetricsExampleDataclass,
65 MetricsExampleModel,
66)
67from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
68from lsst.daf.butler.tests.utils import TestCaseMixin
69from lsst.resources import ResourcePath
70from lsst.utils import doImport
72TESTDIR = os.path.dirname(__file__)
75def makeExampleMetrics(use_none: bool = False) -> MetricsExample:
76 """Make example dataset that can be stored in butler."""
77 if use_none:
78 array = None
79 else:
80 array = [563, 234, 456.7, 105, 2054, -1045]
81 return MetricsExample(
82 {"AM1": 5.2, "AM2": 30.6},
83 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
84 array,
85 )
88class TransactionTestError(Exception):
89 """Specific error for transactions, to prevent misdiagnosing
90 that might otherwise occur when a standard exception is used.
91 """
93 pass
96class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin):
97 """Support routines for datastore testing"""
99 root: str | None = None
100 universe: DimensionUniverse
101 storageClassFactory: StorageClassFactory
103 @classmethod
104 def setUpClass(cls) -> None:
105 # Storage Classes are fixed for all datastores in these tests
106 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
107 cls.storageClassFactory = StorageClassFactory()
108 cls.storageClassFactory.addFromConfig(scConfigFile)
110 # Read the Datastore config so we can get the class
111 # information (since we should not assume the constructor
112 # name here, but rely on the configuration file itself)
113 datastoreConfig = DatastoreConfig(cls.configFile)
114 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"]))
115 cls.universe = DimensionUniverse()
117 def setUp(self) -> None:
118 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
120 def tearDown(self) -> None:
121 if self.root is not None and os.path.exists(self.root):
122 shutil.rmtree(self.root, ignore_errors=True)
125class DatastoreTests(DatastoreTestsBase):
126 """Some basic tests of a simple datastore."""
128 hasUnsupportedPut = True
129 rootKeys: tuple[str, ...] | None = None
130 isEphemeral: bool = False
131 validationCanFail: bool = False
133 def testConfigRoot(self) -> None:
134 full = DatastoreConfig(self.configFile)
135 config = DatastoreConfig(self.configFile, mergeDefaults=False)
136 newroot = "/random/location"
137 self.datastoreType.setConfigRoot(newroot, config, full)
138 if self.rootKeys:
139 for k in self.rootKeys:
140 self.assertIn(newroot, config[k])
142 def testConstructor(self) -> None:
143 datastore = self.makeDatastore()
144 self.assertIsNotNone(datastore)
145 self.assertIs(datastore.isEphemeral, self.isEphemeral)
147 def testConfigurationValidation(self) -> None:
148 datastore = self.makeDatastore()
149 sc = self.storageClassFactory.getStorageClass("ThingOne")
150 datastore.validateConfiguration([sc])
152 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
153 if self.validationCanFail:
154 with self.assertRaises(DatastoreValidationError):
155 datastore.validateConfiguration([sc2], logFailures=True)
157 dimensions = self.universe.extract(("visit", "physical_filter"))
158 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
159 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
160 datastore.validateConfiguration([ref])
162 def testParameterValidation(self) -> None:
163 """Check that parameters are validated"""
164 sc = self.storageClassFactory.getStorageClass("ThingOne")
165 dimensions = self.universe.extract(("visit", "physical_filter"))
166 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
167 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
168 datastore = self.makeDatastore()
169 data = {1: 2, 3: 4}
170 datastore.put(data, ref)
171 newdata = datastore.get(ref)
172 self.assertEqual(data, newdata)
173 with self.assertRaises(KeyError):
174 newdata = datastore.get(ref, parameters={"missing": 5})
176 def testBasicPutGet(self) -> None:
177 metrics = makeExampleMetrics()
178 datastore = self.makeDatastore()
180 # Create multiple storage classes for testing different formulations
181 storageClasses = [
182 self.storageClassFactory.getStorageClass(sc)
183 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
184 ]
186 dimensions = self.universe.extract(("visit", "physical_filter"))
187 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
188 dataId2 = dict({"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"})
190 for sc in storageClasses:
191 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
192 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2)
194 # Make sure that using getManyURIs without predicting before the
195 # dataset has been put raises.
196 with self.assertRaises(FileNotFoundError):
197 datastore.getManyURIs([ref], predict=False)
199 # Make sure that using getManyURIs with predicting before the
200 # dataset has been put predicts the URI.
201 uris = datastore.getManyURIs([ref, ref2], predict=True)
202 self.assertIn("52", uris[ref].primaryURI.geturl())
203 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
204 self.assertIn("53", uris[ref2].primaryURI.geturl())
205 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
207 datastore.put(metrics, ref)
209 # Does it exist?
210 self.assertTrue(datastore.exists(ref))
211 self.assertTrue(datastore.knows(ref))
212 multi = datastore.knows_these([ref])
213 self.assertTrue(multi[ref])
214 multi = datastore.mexists([ref, ref2])
215 self.assertTrue(multi[ref])
216 self.assertFalse(multi[ref2])
218 # Get
219 metricsOut = datastore.get(ref, parameters=None)
220 self.assertEqual(metrics, metricsOut)
222 uri = datastore.getURI(ref)
223 self.assertEqual(uri.scheme, self.uriScheme)
225 uris = datastore.getManyURIs([ref])
226 self.assertEqual(len(uris), 1)
227 ref, uri = uris.popitem()
228 self.assertTrue(uri.primaryURI.exists())
229 self.assertFalse(uri.componentURIs)
231 # Get a component -- we need to construct new refs for them
232 # with derived storage classes but with parent ID
233 for comp in ("data", "output"):
234 compRef = ref.makeComponentRef(comp)
235 output = datastore.get(compRef)
236 self.assertEqual(output, getattr(metricsOut, comp))
238 uri = datastore.getURI(compRef)
239 self.assertEqual(uri.scheme, self.uriScheme)
241 uris = datastore.getManyURIs([compRef])
242 self.assertEqual(len(uris), 1)
244 storageClass = sc
246 # Check that we can put a metric with None in a component and
247 # get it back as None
248 metricsNone = makeExampleMetrics(use_none=True)
249 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"}
250 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone)
251 datastore.put(metricsNone, refNone)
253 comp = "data"
254 for comp in ("data", "output"):
255 compRef = refNone.makeComponentRef(comp)
256 output = datastore.get(compRef)
257 self.assertEqual(output, getattr(metricsNone, comp))
259 # Check that a put fails if the dataset type is not supported
260 if self.hasUnsupportedPut:
261 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
262 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
263 with self.assertRaises(DatasetTypeNotSupportedError):
264 datastore.put(metrics, ref)
266 # These should raise
267 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId)
268 with self.assertRaises(FileNotFoundError):
269 # non-existing file
270 datastore.get(ref)
272 # Get a URI from it
273 uri = datastore.getURI(ref, predict=True)
274 self.assertEqual(uri.scheme, self.uriScheme)
276 with self.assertRaises(FileNotFoundError):
277 datastore.getURI(ref)
279 def testTrustGetRequest(self) -> None:
280 """Check that we can get datasets that registry knows nothing about."""
281 datastore = self.makeDatastore()
283 # Skip test if the attribute is not defined
284 if not hasattr(datastore, "trustGetRequest"):
285 return
287 metrics = makeExampleMetrics()
289 i = 0
290 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"):
291 i += 1
292 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
294 if sc_name == "StructuredComposite":
295 disassembled = True
296 else:
297 disassembled = False
299 # Start datastore in default configuration of using registry
300 datastore.trustGetRequest = False
302 # Create multiple storage classes for testing with or without
303 # disassembly
304 sc = self.storageClassFactory.getStorageClass(sc_name)
305 dimensions = self.universe.extract(("visit", "physical_filter"))
307 dataId = dict({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"})
309 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
310 datastore.put(metrics, ref)
312 # Does it exist?
313 self.assertTrue(datastore.exists(ref))
314 self.assertTrue(datastore.knows(ref))
315 multi = datastore.knows_these([ref])
316 self.assertTrue(multi[ref])
317 multi = datastore.mexists([ref])
318 self.assertTrue(multi[ref])
320 # Get
321 metricsOut = datastore.get(ref)
322 self.assertEqual(metrics, metricsOut)
324 # Get the URI(s)
325 primaryURI, componentURIs = datastore.getURIs(ref)
326 if disassembled:
327 self.assertIsNone(primaryURI)
328 self.assertEqual(len(componentURIs), 3)
329 else:
330 self.assertIn(datasetTypeName, primaryURI.path)
331 self.assertFalse(componentURIs)
333 # Delete registry entry so now we are trusting
334 datastore.removeStoredItemInfo(ref)
336 # Now stop trusting and check that things break
337 datastore.trustGetRequest = False
339 # Does it exist?
340 self.assertFalse(datastore.exists(ref))
341 self.assertFalse(datastore.knows(ref))
342 multi = datastore.knows_these([ref])
343 self.assertFalse(multi[ref])
344 multi = datastore.mexists([ref])
345 self.assertFalse(multi[ref])
347 with self.assertRaises(FileNotFoundError):
348 datastore.get(ref)
350 if sc_name != "StructuredDataNoComponents":
351 with self.assertRaises(FileNotFoundError):
352 datastore.get(ref.makeComponentRef("data"))
354 # URI should fail unless we ask for prediction
355 with self.assertRaises(FileNotFoundError):
356 datastore.getURIs(ref)
358 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
359 if disassembled:
360 self.assertIsNone(predicted_primary)
361 self.assertEqual(len(predicted_disassembled), 3)
362 for uri in predicted_disassembled.values():
363 self.assertEqual(uri.fragment, "predicted")
364 self.assertIn(datasetTypeName, uri.path)
365 else:
366 self.assertIn(datasetTypeName, predicted_primary.path)
367 self.assertFalse(predicted_disassembled)
368 self.assertEqual(predicted_primary.fragment, "predicted")
370 # Now enable registry-free trusting mode
371 datastore.trustGetRequest = True
373 # Try again to get it
374 metricsOut = datastore.get(ref)
375 self.assertEqual(metricsOut, metrics)
377 # Does it exist?
378 self.assertTrue(datastore.exists(ref))
380 # Get a component
381 if sc_name != "StructuredDataNoComponents":
382 comp = "data"
383 compRef = ref.makeComponentRef(comp)
384 output = datastore.get(compRef)
385 self.assertEqual(output, getattr(metrics, comp))
387 # Get the URI -- if we trust this should work even without
388 # enabling prediction.
389 primaryURI2, componentURIs2 = datastore.getURIs(ref)
390 self.assertEqual(primaryURI2, primaryURI)
391 self.assertEqual(componentURIs2, componentURIs)
393 # Check for compatible storage class.
394 if sc_name in ("StructuredDataNoComponents", "StructuredData"):
395 # Make new dataset ref with compatible storage class.
396 ref_comp = ref.overrideStorageClass("StructuredDataDictJson")
398 # Without `set_retrieve_dataset_type_method` it will fail to
399 # find correct file.
400 self.assertFalse(datastore.exists(ref_comp))
401 with self.assertRaises(FileNotFoundError):
402 datastore.get(ref_comp)
403 with self.assertRaises(FileNotFoundError):
404 datastore.get(ref, storageClass="StructuredDataDictJson")
406 # Need a special method to generate stored dataset type.
407 def _stored_dataset_type(name: str) -> DatasetType:
408 if name == ref.datasetType.name:
409 return ref.datasetType
410 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}")
412 datastore.set_retrieve_dataset_type_method(_stored_dataset_type)
414 # Storage class override with original dataset ref.
415 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson")
416 self.assertIsInstance(metrics_as_dict, dict)
418 # get() should return a dict now.
419 metrics_as_dict = datastore.get(ref_comp)
420 self.assertIsInstance(metrics_as_dict, dict)
422 # exists() should work as well.
423 self.assertTrue(datastore.exists(ref_comp))
425 datastore.set_retrieve_dataset_type_method(None)
427 def testDisassembly(self) -> None:
428 """Test disassembly within datastore."""
429 metrics = makeExampleMetrics()
430 if self.isEphemeral:
431 # in-memory datastore does not disassemble
432 return
434 # Create multiple storage classes for testing different formulations
435 # of composites. One of these will not disassemble to provide
436 # a reference.
437 storageClasses = [
438 self.storageClassFactory.getStorageClass(sc)
439 for sc in (
440 "StructuredComposite",
441 "StructuredCompositeTestA",
442 "StructuredCompositeTestB",
443 "StructuredCompositeReadComp",
444 "StructuredData", # No disassembly
445 "StructuredCompositeReadCompNoDisassembly",
446 )
447 ]
449 # Create the test datastore
450 datastore = self.makeDatastore()
452 # Dummy dataId
453 dimensions = self.universe.extract(("visit", "physical_filter"))
454 dataId = dict({"instrument": "dummy", "visit": 428, "physical_filter": "R"})
456 for i, sc in enumerate(storageClasses):
457 with self.subTest(storageClass=sc.name):
458 # Create a different dataset type each time round
459 # so that a test failure in this subtest does not trigger
460 # a cascade of tests because of file clashes
461 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId)
463 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
465 datastore.put(metrics, ref)
467 baseURI, compURIs = datastore.getURIs(ref)
468 if disassembled:
469 self.assertIsNone(baseURI)
470 self.assertEqual(set(compURIs), {"data", "output", "summary"})
471 else:
472 self.assertIsNotNone(baseURI)
473 self.assertEqual(compURIs, {})
475 metrics_get = datastore.get(ref)
476 self.assertEqual(metrics_get, metrics)
478 # Retrieve the composite with read parameter
479 stop = 4
480 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
481 self.assertEqual(metrics_get.summary, metrics.summary)
482 self.assertEqual(metrics_get.output, metrics.output)
483 self.assertEqual(metrics_get.data, metrics.data[:stop])
485 # Retrieve a component
486 data = datastore.get(ref.makeComponentRef("data"))
487 self.assertEqual(data, metrics.data)
489 # On supported storage classes attempt to access a read
490 # only component
491 if "ReadComp" in sc.name:
492 cRef = ref.makeComponentRef("counter")
493 counter = datastore.get(cRef)
494 self.assertEqual(counter, len(metrics.data))
496 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
497 self.assertEqual(counter, stop)
499 datastore.remove(ref)
501 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]:
502 metrics = makeExampleMetrics()
503 datastore = self.makeDatastore()
504 # Put
505 dimensions = self.universe.extract(("visit", "physical_filter"))
506 sc = self.storageClassFactory.getStorageClass("StructuredData")
507 refs = []
508 for i in range(n_refs):
509 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"}
510 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
511 datastore.put(metrics, ref)
513 # Does it exist?
514 self.assertTrue(datastore.exists(ref))
516 # Get
517 metricsOut = datastore.get(ref)
518 self.assertEqual(metrics, metricsOut)
519 refs.append(ref)
521 return datastore, *refs
523 def testRemove(self) -> None:
524 datastore, ref = self.prepDeleteTest()
526 # Remove
527 datastore.remove(ref)
529 # Does it exist?
530 self.assertFalse(datastore.exists(ref))
532 # Do we now get a predicted URI?
533 uri = datastore.getURI(ref, predict=True)
534 self.assertEqual(uri.fragment, "predicted")
536 # Get should now fail
537 with self.assertRaises(FileNotFoundError):
538 datastore.get(ref)
539 # Can only delete once
540 with self.assertRaises(FileNotFoundError):
541 datastore.remove(ref)
543 def testForget(self) -> None:
544 datastore, ref = self.prepDeleteTest()
546 # Remove
547 datastore.forget([ref])
549 # Does it exist (as far as we know)?
550 self.assertFalse(datastore.exists(ref))
552 # Do we now get a predicted URI?
553 uri = datastore.getURI(ref, predict=True)
554 self.assertEqual(uri.fragment, "predicted")
556 # Get should now fail
557 with self.assertRaises(FileNotFoundError):
558 datastore.get(ref)
560 # Forgetting again is a silent no-op
561 datastore.forget([ref])
563 # Predicted URI should still point to the file.
564 self.assertTrue(uri.exists())
566 def testTransfer(self) -> None:
567 metrics = makeExampleMetrics()
569 dimensions = self.universe.extract(("visit", "physical_filter"))
570 dataId = dict({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"})
572 sc = self.storageClassFactory.getStorageClass("StructuredData")
573 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
575 inputDatastore = self.makeDatastore("test_input_datastore")
576 outputDatastore = self.makeDatastore("test_output_datastore")
578 inputDatastore.put(metrics, ref)
579 outputDatastore.transfer(inputDatastore, ref)
581 metricsOut = outputDatastore.get(ref)
582 self.assertEqual(metrics, metricsOut)
584 def testBasicTransaction(self) -> None:
585 datastore = self.makeDatastore()
586 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
587 dimensions = self.universe.extract(("visit", "physical_filter"))
588 nDatasets = 6
589 dataIds = [
590 dict({"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"})
591 for i in range(nDatasets)
592 ]
593 data = [
594 (
595 self.makeDatasetRef("metric", dimensions, storageClass, dataId),
596 makeExampleMetrics(),
597 )
598 for dataId in dataIds
599 ]
600 succeed = data[: nDatasets // 2]
601 fail = data[nDatasets // 2 :]
602 # All datasets added in this transaction should continue to exist
603 with datastore.transaction():
604 for ref, metrics in succeed:
605 datastore.put(metrics, ref)
606 # Whereas datasets added in this transaction should not
607 with self.assertRaises(TransactionTestError):
608 with datastore.transaction():
609 for ref, metrics in fail:
610 datastore.put(metrics, ref)
611 raise TransactionTestError("This should propagate out of the context manager")
612 # Check for datasets that should exist
613 for ref, metrics in succeed:
614 # Does it exist?
615 self.assertTrue(datastore.exists(ref))
616 # Get
617 metricsOut = datastore.get(ref, parameters=None)
618 self.assertEqual(metrics, metricsOut)
619 # URI
620 uri = datastore.getURI(ref)
621 self.assertEqual(uri.scheme, self.uriScheme)
622 # Check for datasets that should not exist
623 for ref, _ in fail:
624 # These should raise
625 with self.assertRaises(FileNotFoundError):
626 # non-existing file
627 datastore.get(ref)
628 with self.assertRaises(FileNotFoundError):
629 datastore.getURI(ref)
631 def testNestedTransaction(self) -> None:
632 datastore = self.makeDatastore()
633 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
634 dimensions = self.universe.extract(("visit", "physical_filter"))
635 metrics = makeExampleMetrics()
637 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
638 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
639 datastore.put(metrics, refBefore)
640 with self.assertRaises(TransactionTestError):
641 with datastore.transaction():
642 dataId = dict({"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"})
643 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
644 datastore.put(metrics, refOuter)
645 with datastore.transaction():
646 dataId = dict({"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"})
647 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
648 datastore.put(metrics, refInner)
649 # All datasets should exist
650 for ref in (refBefore, refOuter, refInner):
651 metricsOut = datastore.get(ref, parameters=None)
652 self.assertEqual(metrics, metricsOut)
653 raise TransactionTestError("This should roll back the transaction")
654 # Dataset(s) inserted before the transaction should still exist
655 metricsOut = datastore.get(refBefore, parameters=None)
656 self.assertEqual(metrics, metricsOut)
657 # But all datasets inserted during the (rolled back) transaction
658 # should be gone
659 with self.assertRaises(FileNotFoundError):
660 datastore.get(refOuter)
661 with self.assertRaises(FileNotFoundError):
662 datastore.get(refInner)
664 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]:
665 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
666 dimensions = self.universe.extract(("visit", "physical_filter"))
667 metrics = makeExampleMetrics()
668 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
669 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
670 return metrics, ref
672 def runIngestTest(
673 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True
674 ) -> None:
675 metrics, ref = self._prepareIngestTest()
676 # The file will be deleted after the test.
677 # For symlink tests this leads to a situation where the datastore
678 # points to a file that does not exist. This will make os.path.exist
679 # return False but then the new symlink will fail with
680 # FileExistsError later in the code so the test still passes.
681 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
682 with open(path, "w") as fd:
683 yaml.dump(metrics._asdict(), stream=fd)
684 func(metrics, path, ref)
686 def testIngestNoTransfer(self) -> None:
687 """Test ingesting existing files with no transfer."""
688 for mode in (None, "auto"):
689 # Some datastores have auto but can't do in place transfer
690 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
691 continue
693 with self.subTest(mode=mode):
694 datastore = self.makeDatastore()
696 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
697 """Ingest a file already in the datastore root."""
698 # first move it into the root, and adjust the path
699 # accordingly
700 path = shutil.copy(path, datastore.root.ospath)
701 path = os.path.relpath(path, start=datastore.root.ospath)
702 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
703 self.assertEqual(obj, datastore.get(ref))
705 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
706 """Can't ingest files if we're given a bad path."""
707 with self.assertRaises(FileNotFoundError):
708 datastore.ingest(
709 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
710 )
711 self.assertFalse(datastore.exists(ref))
713 def failOutsideRoot(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
714 """Can't ingest files outside of datastore root unless
715 auto.
716 """
717 if mode == "auto":
718 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
719 self.assertTrue(datastore.exists(ref))
720 else:
721 with self.assertRaises(RuntimeError):
722 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
723 self.assertFalse(datastore.exists(ref))
725 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
726 with self.assertRaises(NotImplementedError):
727 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
729 if mode in self.ingestTransferModes:
730 self.runIngestTest(failOutsideRoot)
731 self.runIngestTest(failInputDoesNotExist)
732 self.runIngestTest(succeed)
733 else:
734 self.runIngestTest(failNotImplemented)
736 def testIngestTransfer(self) -> None:
737 """Test ingesting existing files after transferring them."""
738 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
739 with self.subTest(mode=mode):
740 datastore = self.makeDatastore(mode)
742 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
743 """Ingest a file by transferring it to the template
744 location.
745 """
746 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
747 self.assertEqual(obj, datastore.get(ref))
749 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
750 """Can't ingest files if we're given a bad path."""
751 with self.assertRaises(FileNotFoundError):
752 # Ensure the file does not look like it is in
753 # datastore for auto mode
754 datastore.ingest(
755 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
756 )
757 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
759 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
760 with self.assertRaises(NotImplementedError):
761 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
763 if mode in self.ingestTransferModes:
764 self.runIngestTest(failInputDoesNotExist)
765 self.runIngestTest(succeed, expectOutput=(mode != "move"))
766 else:
767 self.runIngestTest(failNotImplemented)
769 def testIngestSymlinkOfSymlink(self) -> None:
770 """Special test for symlink to a symlink ingest"""
771 metrics, ref = self._prepareIngestTest()
772 # The aim of this test is to create a dataset on disk, then
773 # create a symlink to it and finally ingest the symlink such that
774 # the symlink in the datastore points to the original dataset.
775 for mode in ("symlink", "relsymlink"):
776 if mode not in self.ingestTransferModes:
777 continue
779 print(f"Trying mode {mode}")
780 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
781 with open(realpath, "w") as fd:
782 yaml.dump(metrics._asdict(), stream=fd)
783 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
784 os.symlink(os.path.abspath(realpath), sympath)
786 datastore = self.makeDatastore()
787 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
789 uri = datastore.getURI(ref)
790 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
791 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
793 linkTarget = os.readlink(uri.ospath)
794 if mode == "relsymlink":
795 self.assertFalse(os.path.isabs(linkTarget))
796 else:
797 self.assertEqual(linkTarget, os.path.abspath(realpath))
799 # Check that we can get the dataset back regardless of mode
800 metric2 = datastore.get(ref)
801 self.assertEqual(metric2, metrics)
803 # Cleanup the file for next time round loop
804 # since it will get the same file name in store
805 datastore.remove(ref)
807 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
808 datastore = self.makeDatastore(name)
810 # For now only the FileDatastore can be used for this test.
811 # ChainedDatastore that only includes InMemoryDatastores have to be
812 # skipped as well.
813 for name in datastore.names:
814 if not name.startswith("InMemoryDatastore"):
815 break
816 else:
817 raise unittest.SkipTest("in-memory datastore does not support record export/import")
819 metrics = makeExampleMetrics()
820 dimensions = self.universe.extract(("visit", "physical_filter"))
821 sc = self.storageClassFactory.getStorageClass("StructuredData")
823 refs = []
824 for visit in (2048, 2049, 2050):
825 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"}
826 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
827 datastore.put(metrics, ref)
828 refs.append(ref)
829 return datastore, refs
831 def testExportImportRecords(self) -> None:
832 """Test for export_records and import_records methods."""
833 datastore, refs = self._populate_export_datastore("test_datastore")
834 for exported_refs in (refs, refs[1:]):
835 n_refs = len(exported_refs)
836 records = datastore.export_records(exported_refs)
837 self.assertGreater(len(records), 0)
838 self.assertTrue(set(records.keys()) <= set(datastore.names))
839 # In a ChainedDatastore each FileDatastore will have a complete set
840 for datastore_name in records:
841 record_data = records[datastore_name]
842 self.assertEqual(len(record_data.records), n_refs)
844 # Check that subsetting works, include non-existing dataset ID.
845 dataset_ids = {exported_refs[0].id, uuid.uuid4()}
846 subset = record_data.subset(dataset_ids)
847 assert subset is not None
848 self.assertEqual(len(subset.records), 1)
849 subset = record_data.subset({uuid.uuid4()})
850 self.assertIsNone(subset)
852 # Use the same datastore name to import relative path.
853 datastore2 = self.makeDatastore("test_datastore")
855 records = datastore.export_records(refs[1:])
856 datastore2.import_records(records)
858 with self.assertRaises(FileNotFoundError):
859 data = datastore2.get(refs[0])
860 data = datastore2.get(refs[1])
861 self.assertIsNotNone(data)
862 data = datastore2.get(refs[2])
863 self.assertIsNotNone(data)
865 def testExport(self) -> None:
866 datastore, refs = self._populate_export_datastore("test_datastore")
868 datasets = list(datastore.export(refs))
869 self.assertEqual(len(datasets), 3)
871 for transfer in (None, "auto"):
872 # Both will default to None
873 datasets = list(datastore.export(refs, transfer=transfer))
874 self.assertEqual(len(datasets), 3)
876 with self.assertRaises(TypeError):
877 list(datastore.export(refs, transfer="copy"))
879 with self.assertRaises(TypeError):
880 list(datastore.export(refs, directory="exportDir", transfer="move"))
882 # Create a new ref that is not known to the datastore and try to
883 # export it.
884 sc = self.storageClassFactory.getStorageClass("ThingOne")
885 dimensions = self.universe.extract(("visit", "physical_filter"))
886 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
887 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
888 with self.assertRaises(FileNotFoundError):
889 list(datastore.export(refs + [ref], transfer=None))
891 def test_pydantic_dict_storage_class_conversions(self) -> None:
892 """Test converting a dataset stored as a pydantic model into a dict on
893 read.
894 """
895 datastore = self.makeDatastore()
896 store_as_model = self.makeDatasetRef(
897 "store_as_model",
898 dimensions=self.universe.empty,
899 storageClass="DictConvertibleModel",
900 dataId=DataCoordinate.makeEmpty(self.universe),
901 )
902 content = {"a": "one", "b": "two"}
903 model = DictConvertibleModel.from_dict(content, extra="original content")
904 datastore.put(model, store_as_model)
905 retrieved_model = datastore.get(store_as_model)
906 self.assertEqual(retrieved_model, model)
907 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
908 self.assertEqual(type(loaded), dict)
909 self.assertEqual(loaded, content)
911 def test_simple_class_put_get(self) -> None:
912 """Test that we can put and get a simple class with dict()
913 constructor.
914 """
915 datastore = self.makeDatastore()
916 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
917 self._assert_different_puts(datastore, "MetricsExample", data)
919 def test_dataclass_put_get(self) -> None:
920 """Test that we can put and get a simple dataclass."""
921 datastore = self.makeDatastore()
922 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
923 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
925 def test_pydantic_put_get(self) -> None:
926 """Test that we can put and get a simple Pydantic model."""
927 datastore = self.makeDatastore()
928 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
929 self._assert_different_puts(datastore, "MetricsExampleModel", data)
931 def test_tuple_put_get(self) -> None:
932 """Test that we can put and get a tuple."""
933 datastore = self.makeDatastore()
934 data = tuple(["a", "b", 1])
935 self._assert_different_puts(datastore, "TupleExample", data)
937 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None:
938 refs = {
939 x: self.makeDatasetRef(
940 f"stora_as_{x}",
941 dimensions=self.universe.empty,
942 storageClass=f"{storageClass_root}{x}",
943 dataId=DataCoordinate.makeEmpty(self.universe),
944 )
945 for x in ["A", "B"]
946 }
948 for ref in refs.values():
949 datastore.put(data, ref)
951 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
954class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
955 """PosixDatastore specialization"""
957 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
958 uriScheme = "file"
959 canIngestNoTransferAuto = True
960 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
961 isEphemeral = False
962 rootKeys = ("root",)
963 validationCanFail = True
965 def setUp(self) -> None:
966 # Override the working directory before calling the base class
967 self.root = tempfile.mkdtemp(dir=TESTDIR)
968 super().setUp()
970 def testAtomicWrite(self) -> None:
971 """Test that we write to a temporary and then rename"""
972 datastore = self.makeDatastore()
973 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
974 dimensions = self.universe.extract(("visit", "physical_filter"))
975 metrics = makeExampleMetrics()
977 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
978 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
980 with self.assertLogs("lsst.resources", "DEBUG") as cm:
981 datastore.put(metrics, ref)
982 move_logs = [ll for ll in cm.output if "transfer=" in ll]
983 self.assertIn("transfer=move", move_logs[0])
985 # And the transfer should be file to file.
986 self.assertEqual(move_logs[0].count("file://"), 2)
988 def testCanNotDeterminePutFormatterLocation(self) -> None:
989 """Verify that the expected exception is raised if the FileDatastore
990 can not determine the put formatter location.
991 """
992 _ = makeExampleMetrics()
993 datastore = self.makeDatastore()
995 # Create multiple storage classes for testing different formulations
996 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
998 sccomp = StorageClass("Dummy")
999 compositeStorageClass = StorageClass(
1000 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1001 )
1003 dimensions = self.universe.extract(("visit", "physical_filter"))
1004 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1006 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1007 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId)
1009 def raiser(ref: DatasetRef) -> None:
1010 raise DatasetTypeNotSupportedError()
1012 with unittest.mock.patch.object(
1013 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1014 "_determine_put_formatter_location",
1015 side_effect=raiser,
1016 ):
1017 # verify the non-composite ref execution path:
1018 with self.assertRaises(DatasetTypeNotSupportedError):
1019 datastore.getURIs(ref, predict=True)
1021 # verify the composite-ref execution path:
1022 with self.assertRaises(DatasetTypeNotSupportedError):
1023 datastore.getURIs(compRef, predict=True)
1025 def test_roots(self):
1026 datastore = self.makeDatastore()
1028 self.assertEqual(set(datastore.names), set(datastore.roots.keys()))
1029 for root in datastore.roots.values():
1030 if root is not None:
1031 self.assertTrue(root.exists())
1034class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1035 """Posix datastore tests but with checksums disabled."""
1037 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1039 def testChecksum(self) -> None:
1040 """Ensure that checksums have not been calculated."""
1041 datastore = self.makeDatastore()
1042 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1043 dimensions = self.universe.extract(("visit", "physical_filter"))
1044 metrics = makeExampleMetrics()
1046 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
1047 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1049 # Configuration should have disabled checksum calculation
1050 datastore.put(metrics, ref)
1051 infos = datastore.getStoredItemsInfo(ref)
1052 self.assertIsNone(infos[0].checksum)
1054 # Remove put back but with checksums enabled explicitly
1055 datastore.remove(ref)
1056 datastore.useChecksum = True
1057 datastore.put(metrics, ref)
1059 infos = datastore.getStoredItemsInfo(ref)
1060 self.assertIsNotNone(infos[0].checksum)
1063class TrashDatastoreTestCase(PosixDatastoreTestCase):
1064 """Restrict trash test to FileDatastore."""
1066 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1068 def testTrash(self) -> None:
1069 datastore, *refs = self.prepDeleteTest(n_refs=10)
1071 # Trash one of them.
1072 ref = refs.pop()
1073 uri = datastore.getURI(ref)
1074 datastore.trash(ref)
1075 self.assertTrue(uri.exists(), uri) # Not deleted yet
1076 datastore.emptyTrash()
1077 self.assertFalse(uri.exists(), uri)
1079 # Trash it again should be fine.
1080 datastore.trash(ref)
1082 # Trash multiple items at once.
1083 subset = [refs.pop(), refs.pop()]
1084 datastore.trash(subset)
1085 datastore.emptyTrash()
1087 # Remove a record and trash should do nothing.
1088 # This is execution butler scenario.
1089 ref = refs.pop()
1090 uri = datastore.getURI(ref)
1091 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1092 self.assertTrue(uri.exists())
1093 datastore.trash(ref)
1094 datastore.emptyTrash()
1095 self.assertTrue(uri.exists())
1097 # Switch on trust and it should delete the file.
1098 datastore.trustGetRequest = True
1099 datastore.trash([ref])
1100 self.assertFalse(uri.exists())
1102 # Remove multiples at once in trust mode.
1103 subset = [refs.pop() for i in range(3)]
1104 datastore.trash(subset)
1105 datastore.trash(refs.pop()) # Check that a single ref can trash
1108class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1109 """Test datastore cleans up on failure."""
1111 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1113 def setUp(self) -> None:
1114 # Override the working directory before calling the base class
1115 self.root = tempfile.mkdtemp(dir=TESTDIR)
1116 super().setUp()
1118 def testCleanup(self) -> None:
1119 """Test that a failed formatter write does cleanup a partial file."""
1120 metrics = makeExampleMetrics()
1121 datastore = self.makeDatastore()
1123 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1125 dimensions = self.universe.extract(("visit", "physical_filter"))
1126 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1128 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1130 # Determine where the file will end up (we assume Formatters use
1131 # the same file extension)
1132 expectedUri = datastore.getURI(ref, predict=True)
1133 self.assertEqual(expectedUri.fragment, "predicted")
1135 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1137 # Try formatter that fails and formatter that fails and leaves
1138 # a file behind
1139 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1140 with self.subTest(formatter=formatter):
1141 # Monkey patch the formatter
1142 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1144 # Try to put the dataset, it should fail
1145 with self.assertRaises(Exception):
1146 datastore.put(metrics, ref)
1148 # Check that there is no file on disk
1149 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1151 # Check that there is a directory
1152 dir = expectedUri.dirname()
1153 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1155 # Force YamlFormatter and check that this time a file is written
1156 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1157 datastore.put(metrics, ref)
1158 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1159 datastore.remove(ref)
1160 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1163class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1164 """PosixDatastore specialization"""
1166 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1167 uriScheme = "mem"
1168 hasUnsupportedPut = False
1169 ingestTransferModes = ()
1170 isEphemeral = True
1171 rootKeys = None
1172 validationCanFail = False
1175class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1176 """ChainedDatastore specialization using a POSIXDatastore"""
1178 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1179 hasUnsupportedPut = False
1180 canIngestNoTransferAuto = False
1181 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1182 isEphemeral = False
1183 rootKeys = (".datastores.1.root", ".datastores.2.root")
1184 validationCanFail = True
1187class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1188 """ChainedDatastore specialization using all InMemoryDatastore"""
1190 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1191 validationCanFail = False
1194class DatastoreConstraintsTests(DatastoreTestsBase):
1195 """Basic tests of constraints model of Datastores."""
1197 def testConstraints(self) -> None:
1198 """Test constraints model. Assumes that each test class has the
1199 same constraints.
1200 """
1201 metrics = makeExampleMetrics()
1202 datastore = self.makeDatastore()
1204 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1205 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1206 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1207 dataId = dict({"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"})
1209 # Write empty file suitable for ingest check (JSON and YAML variants)
1210 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1211 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1212 for datasetTypeName, sc, accepted in (
1213 ("metric", sc1, True),
1214 ("metric5", sc1, False),
1215 ("metric33", sc1, True),
1216 ("metric5", sc2, True),
1217 ):
1218 # Choose different temp file depending on StorageClass
1219 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1221 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1222 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
1223 if accepted:
1224 datastore.put(metrics, ref)
1225 self.assertTrue(datastore.exists(ref))
1226 datastore.remove(ref)
1228 # Try ingest
1229 if self.canIngest:
1230 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1231 self.assertTrue(datastore.exists(ref))
1232 datastore.remove(ref)
1233 else:
1234 with self.assertRaises(DatasetTypeNotSupportedError):
1235 datastore.put(metrics, ref)
1236 self.assertFalse(datastore.exists(ref))
1238 # Again with ingest
1239 if self.canIngest:
1240 with self.assertRaises(DatasetTypeNotSupportedError):
1241 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1242 self.assertFalse(datastore.exists(ref))
1245class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1246 """PosixDatastore specialization"""
1248 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1249 canIngest = True
1251 def setUp(self) -> None:
1252 # Override the working directory before calling the base class
1253 self.root = tempfile.mkdtemp(dir=TESTDIR)
1254 super().setUp()
1257class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1258 """InMemoryDatastore specialization."""
1260 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1261 canIngest = False
1264class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1265 """ChainedDatastore specialization using a POSIXDatastore and constraints
1266 at the ChainedDatstore.
1267 """
1269 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1272class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1273 """ChainedDatastore specialization using a POSIXDatastore."""
1275 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1278class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1279 """ChainedDatastore specialization using all InMemoryDatastore."""
1281 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1282 canIngest = False
1285class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1286 """Test that a chained datastore can control constraints per-datastore
1287 even if child datastore would accept.
1288 """
1290 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1292 def setUp(self) -> None:
1293 # Override the working directory before calling the base class
1294 self.root = tempfile.mkdtemp(dir=TESTDIR)
1295 super().setUp()
1297 def testConstraints(self) -> None:
1298 """Test chained datastore constraints model."""
1299 metrics = makeExampleMetrics()
1300 datastore = self.makeDatastore()
1302 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1303 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1304 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1305 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}
1306 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"}
1308 # Write empty file suitable for ingest check (JSON and YAML variants)
1309 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1310 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1312 for typeName, dataId, sc, accept, ingest in (
1313 ("metric", dataId1, sc1, (False, True, False), True),
1314 ("metric5", dataId1, sc1, (False, False, False), False),
1315 ("metric5", dataId2, sc1, (True, False, False), False),
1316 ("metric33", dataId2, sc2, (True, True, False), True),
1317 ("metric5", dataId1, sc2, (False, True, False), True),
1318 ):
1319 # Choose different temp file depending on StorageClass
1320 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1322 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1323 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId)
1324 if any(accept):
1325 datastore.put(metrics, ref)
1326 self.assertTrue(datastore.exists(ref))
1328 # Check each datastore inside the chained datastore
1329 for childDatastore, expected in zip(datastore.datastores, accept):
1330 self.assertEqual(
1331 childDatastore.exists(ref),
1332 expected,
1333 f"Testing presence of {ref} in datastore {childDatastore.name}",
1334 )
1336 datastore.remove(ref)
1338 # Check that ingest works
1339 if ingest:
1340 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1341 self.assertTrue(datastore.exists(ref))
1343 # Check each datastore inside the chained datastore
1344 for childDatastore, expected in zip(datastore.datastores, accept):
1345 # Ephemeral datastores means InMemory at the moment
1346 # and that does not accept ingest of files.
1347 if childDatastore.isEphemeral:
1348 expected = False
1349 self.assertEqual(
1350 childDatastore.exists(ref),
1351 expected,
1352 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1353 )
1355 datastore.remove(ref)
1356 else:
1357 with self.assertRaises(DatasetTypeNotSupportedError):
1358 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1360 else:
1361 with self.assertRaises(DatasetTypeNotSupportedError):
1362 datastore.put(metrics, ref)
1363 self.assertFalse(datastore.exists(ref))
1365 # Again with ingest
1366 with self.assertRaises(DatasetTypeNotSupportedError):
1367 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1368 self.assertFalse(datastore.exists(ref))
1371class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1372 """Tests for datastore caching infrastructure."""
1374 @classmethod
1375 def setUpClass(cls) -> None:
1376 cls.storageClassFactory = StorageClassFactory()
1377 cls.universe = DimensionUniverse()
1379 # Ensure that we load the test storage class definitions.
1380 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1381 cls.storageClassFactory.addFromConfig(scConfigFile)
1383 def setUp(self) -> None:
1384 self.id = 0
1386 # Create a root that we can use for caching tests.
1387 self.root = tempfile.mkdtemp(dir=TESTDIR)
1389 # Create some test dataset refs and associated test files
1390 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1391 dimensions = self.universe.extract(("visit", "physical_filter"))
1392 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1394 # Create list of refs and list of temporary files
1395 n_datasets = 10
1396 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)]
1398 root_uri = ResourcePath(self.root, forceDirectory=True)
1399 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1401 # Create test files.
1402 for uri in self.files:
1403 uri.write(b"0123456789")
1405 # Create some composite refs with component files.
1406 sc = self.storageClassFactory.getStorageClass("StructuredData")
1407 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)]
1408 self.comp_files = []
1409 self.comp_refs = []
1410 for n, ref in enumerate(self.composite_refs):
1411 component_refs = []
1412 component_files = []
1413 for component in sc.components:
1414 component_ref = ref.makeComponentRef(component)
1415 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1416 component_refs.append(component_ref)
1417 component_files.append(file)
1418 file.write(b"9876543210")
1420 self.comp_files.append(component_files)
1421 self.comp_refs.append(component_refs)
1423 def tearDown(self) -> None:
1424 if self.root is not None and os.path.exists(self.root):
1425 shutil.rmtree(self.root, ignore_errors=True)
1427 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1428 config = Config.fromYaml(config_str)
1429 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1431 def testNoCacheDir(self) -> None:
1432 config_str = """
1433cached:
1434 root: null
1435 cacheable:
1436 metric0: true
1437 """
1438 cache_manager = self._make_cache_manager(config_str)
1440 # Look inside to check we don't have a cache directory
1441 self.assertIsNone(cache_manager._cache_directory)
1443 self.assertCache(cache_manager)
1445 # Test that the cache directory is marked temporary
1446 self.assertTrue(cache_manager.cache_directory.isTemporary)
1448 def testNoCacheDirReversed(self) -> None:
1449 """Use default caching status and metric1 to false"""
1450 config_str = """
1451cached:
1452 root: null
1453 default: true
1454 cacheable:
1455 metric1: false
1456 """
1457 cache_manager = self._make_cache_manager(config_str)
1459 self.assertCache(cache_manager)
1461 def testEnvvarCacheDir(self) -> None:
1462 config_str = f"""
1463cached:
1464 root: '{self.root}'
1465 cacheable:
1466 metric0: true
1467 """
1469 root = ResourcePath(self.root, forceDirectory=True)
1470 env_dir = root.join("somewhere", forceDirectory=True)
1471 elsewhere = root.join("elsewhere", forceDirectory=True)
1473 # Environment variable should override the config value.
1474 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1475 cache_manager = self._make_cache_manager(config_str)
1476 self.assertEqual(cache_manager.cache_directory, env_dir)
1478 # This environment variable should not override the config value.
1479 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1480 cache_manager = self._make_cache_manager(config_str)
1481 self.assertEqual(cache_manager.cache_directory, root)
1483 # No default setting.
1484 config_str = """
1485cached:
1486 root: null
1487 default: true
1488 cacheable:
1489 metric1: false
1490 """
1491 cache_manager = self._make_cache_manager(config_str)
1493 # This environment variable should override the config value.
1494 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1495 cache_manager = self._make_cache_manager(config_str)
1496 self.assertEqual(cache_manager.cache_directory, env_dir)
1498 # If both environment variables are set the main (not IF_UNSET)
1499 # variable should win.
1500 with unittest.mock.patch.dict(
1501 os.environ,
1502 {
1503 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1504 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1505 },
1506 ):
1507 cache_manager = self._make_cache_manager(config_str)
1508 self.assertEqual(cache_manager.cache_directory, env_dir)
1510 # Use the API to set the environment variable, making sure that the
1511 # variable is reset on exit.
1512 with unittest.mock.patch.dict(
1513 os.environ,
1514 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1515 ):
1516 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1517 self.assertTrue(defined)
1518 cache_manager = self._make_cache_manager(config_str)
1519 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1521 # Now create the cache manager ahead of time and set the fallback
1522 # later.
1523 cache_manager = self._make_cache_manager(config_str)
1524 self.assertIsNone(cache_manager._cache_directory)
1525 with unittest.mock.patch.dict(
1526 os.environ,
1527 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1528 ):
1529 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1530 self.assertTrue(defined)
1531 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1533 def testExplicitCacheDir(self) -> None:
1534 config_str = f"""
1535cached:
1536 root: '{self.root}'
1537 cacheable:
1538 metric0: true
1539 """
1540 cache_manager = self._make_cache_manager(config_str)
1542 # Look inside to check we do have a cache directory.
1543 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1545 self.assertCache(cache_manager)
1547 # Test that the cache directory is not marked temporary
1548 self.assertFalse(cache_manager.cache_directory.isTemporary)
1550 def assertCache(self, cache_manager: DatastoreCacheManager) -> None:
1551 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1552 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1554 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1555 self.assertIsInstance(uri, ResourcePath)
1556 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1558 # Check presence in cache using ref and then using file extension.
1559 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1560 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1561 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1562 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1564 # Cached file should no longer exist but uncached file should be
1565 # unaffected.
1566 self.assertFalse(self.files[0].exists())
1567 self.assertTrue(self.files[1].exists())
1569 # Should find this file and it should be within the cache directory.
1570 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1571 self.assertTrue(found.exists())
1572 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1574 # Should not be able to find these in cache
1575 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1576 self.assertIsNone(found)
1577 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1578 self.assertIsNone(found)
1580 def testNoCache(self) -> None:
1581 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1582 for uri, ref in zip(self.files, self.refs):
1583 self.assertFalse(cache_manager.should_be_cached(ref))
1584 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1585 self.assertFalse(cache_manager.known_to_cache(ref))
1586 with cache_manager.find_in_cache(ref, ".txt") as found:
1587 self.assertIsNone(found, msg=f"{cache_manager}")
1589 def _expiration_config(self, mode: str, threshold: int) -> str:
1590 return f"""
1591cached:
1592 default: true
1593 expiry:
1594 mode: {mode}
1595 threshold: {threshold}
1596 cacheable:
1597 unused: true
1598 """
1600 def testCacheExpiryFiles(self) -> None:
1601 threshold = 2 # Keep at least 2 files.
1602 mode = "files"
1603 config_str = self._expiration_config(mode, threshold)
1605 cache_manager = self._make_cache_manager(config_str)
1607 # Check that an empty cache returns unknown for arbitrary ref
1608 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1610 # Should end with datasets: 2, 3, 4
1611 self.assertExpiration(cache_manager, 5, threshold + 1)
1612 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1614 # Check that we will not expire a file that is actively in use.
1615 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1616 self.assertIsNotNone(found)
1618 # Trigger cache expiration that should remove the file
1619 # we just retrieved. Should now have: 3, 4, 5
1620 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1621 self.assertIsNotNone(cached)
1623 # Cache should still report the standard file count.
1624 self.assertEqual(cache_manager.file_count, threshold + 1)
1626 # Add additional entry to cache.
1627 # Should now have 4, 5, 6
1628 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1629 self.assertIsNotNone(cached)
1631 # Is the file still there?
1632 self.assertTrue(found.exists())
1634 # Can we read it?
1635 data = found.read()
1636 self.assertGreater(len(data), 0)
1638 # Outside context the file should no longer exist.
1639 self.assertFalse(found.exists())
1641 # File count should not have changed.
1642 self.assertEqual(cache_manager.file_count, threshold + 1)
1644 # Dataset 2 was in the exempt directory but because hardlinks
1645 # are used it was deleted from the main cache during cache expiry
1646 # above and so should no longer be found.
1647 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1648 self.assertIsNone(found)
1650 # And the one stored after it is also gone.
1651 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1652 self.assertIsNone(found)
1654 # But dataset 4 is present.
1655 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1656 self.assertIsNotNone(found)
1658 # Adding a new dataset to the cache should now delete it.
1659 cache_manager.move_to_cache(self.files[7], self.refs[7])
1661 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1662 self.assertIsNone(found)
1664 def testCacheExpiryDatasets(self) -> None:
1665 threshold = 2 # Keep 2 datasets.
1666 mode = "datasets"
1667 config_str = self._expiration_config(mode, threshold)
1669 cache_manager = self._make_cache_manager(config_str)
1670 self.assertExpiration(cache_manager, 5, threshold + 1)
1671 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1673 def testCacheExpiryDatasetsComposite(self) -> None:
1674 threshold = 2 # Keep 2 datasets.
1675 mode = "datasets"
1676 config_str = self._expiration_config(mode, threshold)
1678 cache_manager = self._make_cache_manager(config_str)
1680 n_datasets = 3
1681 for i in range(n_datasets):
1682 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1683 cached = cache_manager.move_to_cache(component_file, component_ref)
1684 self.assertIsNotNone(cached)
1685 self.assertTrue(cache_manager.known_to_cache(component_ref))
1686 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1687 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1689 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1691 # Write two new non-composite and the number of files should drop.
1692 self.assertExpiration(cache_manager, 2, 5)
1694 def testCacheExpirySize(self) -> None:
1695 threshold = 55 # Each file is 10 bytes
1696 mode = "size"
1697 config_str = self._expiration_config(mode, threshold)
1699 cache_manager = self._make_cache_manager(config_str)
1700 self.assertExpiration(cache_manager, 10, 6)
1701 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1703 def assertExpiration(
1704 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int
1705 ) -> None:
1706 """Insert the datasets and then check the number retained."""
1707 for i in range(n_datasets):
1708 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1709 self.assertIsNotNone(cached)
1711 self.assertEqual(cache_manager.file_count, n_retained)
1713 # The oldest file should not be in the cache any more.
1714 for i in range(n_datasets):
1715 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1716 if i >= n_datasets - n_retained:
1717 self.assertIsInstance(found, ResourcePath)
1718 else:
1719 self.assertIsNone(found)
1721 def testCacheExpiryAge(self) -> None:
1722 threshold = 1 # Expire older than 2 seconds
1723 mode = "age"
1724 config_str = self._expiration_config(mode, threshold)
1726 cache_manager = self._make_cache_manager(config_str)
1727 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1729 # Insert 3 files, then sleep, then insert more.
1730 for i in range(2):
1731 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1732 self.assertIsNotNone(cached)
1733 time.sleep(2.0)
1734 for j in range(4):
1735 i = 2 + j # Continue the counting
1736 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1737 self.assertIsNotNone(cached)
1739 # Only the files written after the sleep should exist.
1740 self.assertEqual(cache_manager.file_count, 4)
1741 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1742 self.assertIsNone(found)
1743 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1744 self.assertIsInstance(found, ResourcePath)
1747class DatasetRefURIsTestCase(unittest.TestCase):
1748 """Tests for DatasetRefURIs."""
1750 def testSequenceAccess(self) -> None:
1751 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1752 uris = DatasetRefURIs()
1754 self.assertEqual(len(uris), 2)
1755 self.assertEqual(uris[0], None)
1756 self.assertEqual(uris[1], {})
1758 primaryURI = ResourcePath("1/2/3")
1759 componentURI = ResourcePath("a/b/c")
1761 # affirm that DatasetRefURIs does not support MutableSequence functions
1762 with self.assertRaises(TypeError):
1763 uris[0] = primaryURI
1764 with self.assertRaises(TypeError):
1765 uris[1] = {"foo": componentURI}
1767 # but DatasetRefURIs can be set by property name:
1768 uris.primaryURI = primaryURI
1769 uris.componentURIs = {"foo": componentURI}
1770 self.assertEqual(uris.primaryURI, primaryURI)
1771 self.assertEqual(uris[0], primaryURI)
1773 primary, components = uris
1774 self.assertEqual(primary, primaryURI)
1775 self.assertEqual(components, {"foo": componentURI})
1777 def testRepr(self) -> None:
1778 """Verify __repr__ output."""
1779 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")})
1780 self.assertEqual(
1781 repr(uris),
1782 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})',
1783 )
1786class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase):
1787 """Test the StoredFileInfo class."""
1789 storageClassFactory = StorageClassFactory()
1791 def test_StoredFileInfo(self) -> None:
1792 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1793 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1795 record = dict(
1796 storage_class="StructuredDataDict",
1797 formatter="lsst.daf.butler.Formatter",
1798 path="a/b/c.txt",
1799 component="component",
1800 dataset_id=ref.id,
1801 checksum=None,
1802 file_size=5,
1803 )
1804 info = StoredFileInfo.from_record(record)
1806 self.assertEqual(info.dataset_id, ref.id)
1807 self.assertEqual(info.to_record(), record)
1809 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1810 rebased = info.rebase(ref2)
1811 self.assertEqual(rebased.dataset_id, ref2.id)
1812 self.assertEqual(rebased.rebase(ref), info)
1814 with self.assertRaises(TypeError):
1815 rebased.update(formatter=42)
1817 with self.assertRaises(ValueError):
1818 rebased.update(something=42, new="42")
1820 # Check that pickle works on StoredFileInfo.
1821 pickled_info = pickle.dumps(info)
1822 unpickled_info = pickle.loads(pickled_info)
1823 self.assertEqual(unpickled_info, info)
1826if __name__ == "__main__":
1827 unittest.main()