Coverage for tests/test_datastore.py: 11%
1009 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:10 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:10 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import pickle
26import shutil
27import tempfile
28import time
29import unittest
30import unittest.mock
31import uuid
32from collections.abc import Callable
33from typing import Any, cast
35import lsst.utils.tests
36import yaml
37from lsst.daf.butler import (
38 Config,
39 DataCoordinate,
40 DatasetRef,
41 DatasetRefURIs,
42 DatasetType,
43 DatasetTypeNotSupportedError,
44 Datastore,
45 DatastoreCacheManager,
46 DatastoreCacheManagerConfig,
47 DatastoreConfig,
48 DatastoreDisabledCacheManager,
49 DatastoreValidationError,
50 DimensionUniverse,
51 FileDataset,
52 StorageClass,
53 StorageClassFactory,
54 StoredFileInfo,
55)
56from lsst.daf.butler.formatters.yaml import YamlFormatter
57from lsst.daf.butler.tests import (
58 BadNoWriteFormatter,
59 BadWriteFormatter,
60 DatasetTestHelper,
61 DatastoreTestHelper,
62 DummyRegistry,
63 MetricsExample,
64 MetricsExampleDataclass,
65 MetricsExampleModel,
66)
67from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
68from lsst.daf.butler.tests.utils import TestCaseMixin
69from lsst.resources import ResourcePath
70from lsst.utils import doImport
72TESTDIR = os.path.dirname(__file__)
75def makeExampleMetrics(use_none: bool = False) -> MetricsExample:
76 """Make example dataset that can be stored in butler."""
77 if use_none:
78 array = None
79 else:
80 array = [563, 234, 456.7, 105, 2054, -1045]
81 return MetricsExample(
82 {"AM1": 5.2, "AM2": 30.6},
83 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
84 array,
85 )
88class TransactionTestError(Exception):
89 """Specific error for transactions, to prevent misdiagnosing
90 that might otherwise occur when a standard exception is used.
91 """
93 pass
96class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin):
97 """Support routines for datastore testing"""
99 root: str | None = None
100 universe: DimensionUniverse
101 storageClassFactory: StorageClassFactory
103 @classmethod
104 def setUpClass(cls) -> None:
105 # Storage Classes are fixed for all datastores in these tests
106 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
107 cls.storageClassFactory = StorageClassFactory()
108 cls.storageClassFactory.addFromConfig(scConfigFile)
110 # Read the Datastore config so we can get the class
111 # information (since we should not assume the constructor
112 # name here, but rely on the configuration file itself)
113 datastoreConfig = DatastoreConfig(cls.configFile)
114 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"]))
115 cls.universe = DimensionUniverse()
117 def setUp(self) -> None:
118 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
120 def tearDown(self) -> None:
121 if self.root is not None and os.path.exists(self.root):
122 shutil.rmtree(self.root, ignore_errors=True)
125class DatastoreTests(DatastoreTestsBase):
126 """Some basic tests of a simple datastore."""
128 hasUnsupportedPut = True
129 rootKeys: tuple[str, ...] | None = None
130 isEphemeral: bool = False
131 validationCanFail: bool = False
133 def testConfigRoot(self) -> None:
134 full = DatastoreConfig(self.configFile)
135 config = DatastoreConfig(self.configFile, mergeDefaults=False)
136 newroot = "/random/location"
137 self.datastoreType.setConfigRoot(newroot, config, full)
138 if self.rootKeys:
139 for k in self.rootKeys:
140 self.assertIn(newroot, config[k])
142 def testConstructor(self) -> None:
143 datastore = self.makeDatastore()
144 self.assertIsNotNone(datastore)
145 self.assertIs(datastore.isEphemeral, self.isEphemeral)
147 def testConfigurationValidation(self) -> None:
148 datastore = self.makeDatastore()
149 sc = self.storageClassFactory.getStorageClass("ThingOne")
150 datastore.validateConfiguration([sc])
152 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
153 if self.validationCanFail:
154 with self.assertRaises(DatastoreValidationError):
155 datastore.validateConfiguration([sc2], logFailures=True)
157 dimensions = self.universe.extract(("visit", "physical_filter"))
158 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
159 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
160 datastore.validateConfiguration([ref])
162 def testParameterValidation(self) -> None:
163 """Check that parameters are validated"""
164 sc = self.storageClassFactory.getStorageClass("ThingOne")
165 dimensions = self.universe.extract(("visit", "physical_filter"))
166 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
167 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
168 datastore = self.makeDatastore()
169 data = {1: 2, 3: 4}
170 datastore.put(data, ref)
171 newdata = datastore.get(ref)
172 self.assertEqual(data, newdata)
173 with self.assertRaises(KeyError):
174 newdata = datastore.get(ref, parameters={"missing": 5})
176 def testBasicPutGet(self) -> None:
177 metrics = makeExampleMetrics()
178 datastore = self.makeDatastore()
180 # Create multiple storage classes for testing different formulations
181 storageClasses = [
182 self.storageClassFactory.getStorageClass(sc)
183 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
184 ]
186 dimensions = self.universe.extract(("visit", "physical_filter"))
187 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
188 dataId2 = dict({"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"})
190 for sc in storageClasses:
191 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
192 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2)
194 # Make sure that using getManyURIs without predicting before the
195 # dataset has been put raises.
196 with self.assertRaises(FileNotFoundError):
197 datastore.getManyURIs([ref], predict=False)
199 # Make sure that using getManyURIs with predicting before the
200 # dataset has been put predicts the URI.
201 uris = datastore.getManyURIs([ref, ref2], predict=True)
202 self.assertIn("52", uris[ref].primaryURI.geturl())
203 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
204 self.assertIn("53", uris[ref2].primaryURI.geturl())
205 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
207 datastore.put(metrics, ref)
209 # Does it exist?
210 self.assertTrue(datastore.exists(ref))
211 self.assertTrue(datastore.knows(ref))
212 multi = datastore.knows_these([ref])
213 self.assertTrue(multi[ref])
214 multi = datastore.mexists([ref, ref2])
215 self.assertTrue(multi[ref])
216 self.assertFalse(multi[ref2])
218 # Get
219 metricsOut = datastore.get(ref, parameters=None)
220 self.assertEqual(metrics, metricsOut)
222 uri = datastore.getURI(ref)
223 self.assertEqual(uri.scheme, self.uriScheme)
225 uris = datastore.getManyURIs([ref])
226 self.assertEqual(len(uris), 1)
227 ref, uri = uris.popitem()
228 self.assertTrue(uri.primaryURI.exists())
229 self.assertFalse(uri.componentURIs)
231 # Get a component -- we need to construct new refs for them
232 # with derived storage classes but with parent ID
233 for comp in ("data", "output"):
234 compRef = ref.makeComponentRef(comp)
235 output = datastore.get(compRef)
236 self.assertEqual(output, getattr(metricsOut, comp))
238 uri = datastore.getURI(compRef)
239 self.assertEqual(uri.scheme, self.uriScheme)
241 uris = datastore.getManyURIs([compRef])
242 self.assertEqual(len(uris), 1)
244 storageClass = sc
246 # Check that we can put a metric with None in a component and
247 # get it back as None
248 metricsNone = makeExampleMetrics(use_none=True)
249 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"}
250 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone)
251 datastore.put(metricsNone, refNone)
253 comp = "data"
254 for comp in ("data", "output"):
255 compRef = refNone.makeComponentRef(comp)
256 output = datastore.get(compRef)
257 self.assertEqual(output, getattr(metricsNone, comp))
259 # Check that a put fails if the dataset type is not supported
260 if self.hasUnsupportedPut:
261 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
262 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
263 with self.assertRaises(DatasetTypeNotSupportedError):
264 datastore.put(metrics, ref)
266 # These should raise
267 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId)
268 with self.assertRaises(FileNotFoundError):
269 # non-existing file
270 datastore.get(ref)
272 # Get a URI from it
273 uri = datastore.getURI(ref, predict=True)
274 self.assertEqual(uri.scheme, self.uriScheme)
276 with self.assertRaises(FileNotFoundError):
277 datastore.getURI(ref)
279 def testTrustGetRequest(self) -> None:
280 """Check that we can get datasets that registry knows nothing about."""
281 datastore = self.makeDatastore()
283 # Skip test if the attribute is not defined
284 if not hasattr(datastore, "trustGetRequest"):
285 return
287 metrics = makeExampleMetrics()
289 i = 0
290 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"):
291 i += 1
292 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
294 if sc_name == "StructuredComposite":
295 disassembled = True
296 else:
297 disassembled = False
299 # Start datastore in default configuration of using registry
300 datastore.trustGetRequest = False
302 # Create multiple storage classes for testing with or without
303 # disassembly
304 sc = self.storageClassFactory.getStorageClass(sc_name)
305 dimensions = self.universe.extract(("visit", "physical_filter"))
307 dataId = dict({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"})
309 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
310 datastore.put(metrics, ref)
312 # Does it exist?
313 self.assertTrue(datastore.exists(ref))
314 self.assertTrue(datastore.knows(ref))
315 multi = datastore.knows_these([ref])
316 self.assertTrue(multi[ref])
317 multi = datastore.mexists([ref])
318 self.assertTrue(multi[ref])
320 # Get
321 metricsOut = datastore.get(ref)
322 self.assertEqual(metrics, metricsOut)
324 # Get the URI(s)
325 primaryURI, componentURIs = datastore.getURIs(ref)
326 if disassembled:
327 self.assertIsNone(primaryURI)
328 self.assertEqual(len(componentURIs), 3)
329 else:
330 self.assertIn(datasetTypeName, primaryURI.path)
331 self.assertFalse(componentURIs)
333 # Delete registry entry so now we are trusting
334 datastore.removeStoredItemInfo(ref)
336 # Now stop trusting and check that things break
337 datastore.trustGetRequest = False
339 # Does it exist?
340 self.assertFalse(datastore.exists(ref))
341 self.assertFalse(datastore.knows(ref))
342 multi = datastore.knows_these([ref])
343 self.assertFalse(multi[ref])
344 multi = datastore.mexists([ref])
345 self.assertFalse(multi[ref])
347 with self.assertRaises(FileNotFoundError):
348 datastore.get(ref)
350 if sc_name != "StructuredDataNoComponents":
351 with self.assertRaises(FileNotFoundError):
352 datastore.get(ref.makeComponentRef("data"))
354 # URI should fail unless we ask for prediction
355 with self.assertRaises(FileNotFoundError):
356 datastore.getURIs(ref)
358 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
359 if disassembled:
360 self.assertIsNone(predicted_primary)
361 self.assertEqual(len(predicted_disassembled), 3)
362 for uri in predicted_disassembled.values():
363 self.assertEqual(uri.fragment, "predicted")
364 self.assertIn(datasetTypeName, uri.path)
365 else:
366 self.assertIn(datasetTypeName, predicted_primary.path)
367 self.assertFalse(predicted_disassembled)
368 self.assertEqual(predicted_primary.fragment, "predicted")
370 # Now enable registry-free trusting mode
371 datastore.trustGetRequest = True
373 # Try again to get it
374 metricsOut = datastore.get(ref)
375 self.assertEqual(metricsOut, metrics)
377 # Does it exist?
378 self.assertTrue(datastore.exists(ref))
380 # Get a component
381 if sc_name != "StructuredDataNoComponents":
382 comp = "data"
383 compRef = ref.makeComponentRef(comp)
384 output = datastore.get(compRef)
385 self.assertEqual(output, getattr(metrics, comp))
387 # Get the URI -- if we trust this should work even without
388 # enabling prediction.
389 primaryURI2, componentURIs2 = datastore.getURIs(ref)
390 self.assertEqual(primaryURI2, primaryURI)
391 self.assertEqual(componentURIs2, componentURIs)
393 # Check for compatible storage class.
394 if sc_name in ("StructuredDataNoComponents", "StructuredData"):
395 # Make new dataset ref with compatible storage class.
396 ref_comp = ref.overrideStorageClass("StructuredDataDictJson")
398 # Without `set_retrieve_dataset_type_method` it will fail to
399 # find correct file.
400 self.assertFalse(datastore.exists(ref_comp))
401 with self.assertRaises(FileNotFoundError):
402 datastore.get(ref_comp)
403 with self.assertRaises(FileNotFoundError):
404 datastore.get(ref, storageClass="StructuredDataDictJson")
406 # Need a special method to generate stored dataset type.
407 def _stored_dataset_type(name: str) -> DatasetType:
408 if name == ref.datasetType.name:
409 return ref.datasetType
410 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}")
412 datastore.set_retrieve_dataset_type_method(_stored_dataset_type)
414 # Storage class override with original dataset ref.
415 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson")
416 self.assertIsInstance(metrics_as_dict, dict)
418 # get() should return a dict now.
419 metrics_as_dict = datastore.get(ref_comp)
420 self.assertIsInstance(metrics_as_dict, dict)
422 # exists() should work as well.
423 self.assertTrue(datastore.exists(ref_comp))
425 datastore.set_retrieve_dataset_type_method(None)
427 def testDisassembly(self) -> None:
428 """Test disassembly within datastore."""
429 metrics = makeExampleMetrics()
430 if self.isEphemeral:
431 # in-memory datastore does not disassemble
432 return
434 # Create multiple storage classes for testing different formulations
435 # of composites. One of these will not disassemble to provide
436 # a reference.
437 storageClasses = [
438 self.storageClassFactory.getStorageClass(sc)
439 for sc in (
440 "StructuredComposite",
441 "StructuredCompositeTestA",
442 "StructuredCompositeTestB",
443 "StructuredCompositeReadComp",
444 "StructuredData", # No disassembly
445 "StructuredCompositeReadCompNoDisassembly",
446 )
447 ]
449 # Create the test datastore
450 datastore = self.makeDatastore()
452 # Dummy dataId
453 dimensions = self.universe.extract(("visit", "physical_filter"))
454 dataId = dict({"instrument": "dummy", "visit": 428, "physical_filter": "R"})
456 for i, sc in enumerate(storageClasses):
457 with self.subTest(storageClass=sc.name):
458 # Create a different dataset type each time round
459 # so that a test failure in this subtest does not trigger
460 # a cascade of tests because of file clashes
461 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId)
463 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
465 datastore.put(metrics, ref)
467 baseURI, compURIs = datastore.getURIs(ref)
468 if disassembled:
469 self.assertIsNone(baseURI)
470 self.assertEqual(set(compURIs), {"data", "output", "summary"})
471 else:
472 self.assertIsNotNone(baseURI)
473 self.assertEqual(compURIs, {})
475 metrics_get = datastore.get(ref)
476 self.assertEqual(metrics_get, metrics)
478 # Retrieve the composite with read parameter
479 stop = 4
480 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
481 self.assertEqual(metrics_get.summary, metrics.summary)
482 self.assertEqual(metrics_get.output, metrics.output)
483 self.assertEqual(metrics_get.data, metrics.data[:stop])
485 # Retrieve a component
486 data = datastore.get(ref.makeComponentRef("data"))
487 self.assertEqual(data, metrics.data)
489 # On supported storage classes attempt to access a read
490 # only component
491 if "ReadComp" in sc.name:
492 cRef = ref.makeComponentRef("counter")
493 counter = datastore.get(cRef)
494 self.assertEqual(counter, len(metrics.data))
496 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
497 self.assertEqual(counter, stop)
499 datastore.remove(ref)
501 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]:
502 metrics = makeExampleMetrics()
503 datastore = self.makeDatastore()
504 # Put
505 dimensions = self.universe.extract(("visit", "physical_filter"))
506 sc = self.storageClassFactory.getStorageClass("StructuredData")
507 refs = []
508 for i in range(n_refs):
509 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"}
510 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
511 datastore.put(metrics, ref)
513 # Does it exist?
514 self.assertTrue(datastore.exists(ref))
516 # Get
517 metricsOut = datastore.get(ref)
518 self.assertEqual(metrics, metricsOut)
519 refs.append(ref)
521 return datastore, *refs
523 def testRemove(self) -> None:
524 datastore, ref = self.prepDeleteTest()
526 # Remove
527 datastore.remove(ref)
529 # Does it exist?
530 self.assertFalse(datastore.exists(ref))
532 # Do we now get a predicted URI?
533 uri = datastore.getURI(ref, predict=True)
534 self.assertEqual(uri.fragment, "predicted")
536 # Get should now fail
537 with self.assertRaises(FileNotFoundError):
538 datastore.get(ref)
539 # Can only delete once
540 with self.assertRaises(FileNotFoundError):
541 datastore.remove(ref)
543 def testForget(self) -> None:
544 datastore, ref = self.prepDeleteTest()
546 # Remove
547 datastore.forget([ref])
549 # Does it exist (as far as we know)?
550 self.assertFalse(datastore.exists(ref))
552 # Do we now get a predicted URI?
553 uri = datastore.getURI(ref, predict=True)
554 self.assertEqual(uri.fragment, "predicted")
556 # Get should now fail
557 with self.assertRaises(FileNotFoundError):
558 datastore.get(ref)
560 # Forgetting again is a silent no-op
561 datastore.forget([ref])
563 # Predicted URI should still point to the file.
564 self.assertTrue(uri.exists())
566 def testTransfer(self) -> None:
567 metrics = makeExampleMetrics()
569 dimensions = self.universe.extract(("visit", "physical_filter"))
570 dataId = dict({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"})
572 sc = self.storageClassFactory.getStorageClass("StructuredData")
573 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
575 inputDatastore = self.makeDatastore("test_input_datastore")
576 outputDatastore = self.makeDatastore("test_output_datastore")
578 inputDatastore.put(metrics, ref)
579 outputDatastore.transfer(inputDatastore, ref)
581 metricsOut = outputDatastore.get(ref)
582 self.assertEqual(metrics, metricsOut)
584 def testBasicTransaction(self) -> None:
585 datastore = self.makeDatastore()
586 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
587 dimensions = self.universe.extract(("visit", "physical_filter"))
588 nDatasets = 6
589 dataIds = [
590 dict({"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"})
591 for i in range(nDatasets)
592 ]
593 data = [
594 (
595 self.makeDatasetRef("metric", dimensions, storageClass, dataId),
596 makeExampleMetrics(),
597 )
598 for dataId in dataIds
599 ]
600 succeed = data[: nDatasets // 2]
601 fail = data[nDatasets // 2 :]
602 # All datasets added in this transaction should continue to exist
603 with datastore.transaction():
604 for ref, metrics in succeed:
605 datastore.put(metrics, ref)
606 # Whereas datasets added in this transaction should not
607 with self.assertRaises(TransactionTestError):
608 with datastore.transaction():
609 for ref, metrics in fail:
610 datastore.put(metrics, ref)
611 raise TransactionTestError("This should propagate out of the context manager")
612 # Check for datasets that should exist
613 for ref, metrics in succeed:
614 # Does it exist?
615 self.assertTrue(datastore.exists(ref))
616 # Get
617 metricsOut = datastore.get(ref, parameters=None)
618 self.assertEqual(metrics, metricsOut)
619 # URI
620 uri = datastore.getURI(ref)
621 self.assertEqual(uri.scheme, self.uriScheme)
622 # Check for datasets that should not exist
623 for ref, _ in fail:
624 # These should raise
625 with self.assertRaises(FileNotFoundError):
626 # non-existing file
627 datastore.get(ref)
628 with self.assertRaises(FileNotFoundError):
629 datastore.getURI(ref)
631 def testNestedTransaction(self) -> None:
632 datastore = self.makeDatastore()
633 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
634 dimensions = self.universe.extract(("visit", "physical_filter"))
635 metrics = makeExampleMetrics()
637 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
638 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
639 datastore.put(metrics, refBefore)
640 with self.assertRaises(TransactionTestError):
641 with datastore.transaction():
642 dataId = dict({"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"})
643 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
644 datastore.put(metrics, refOuter)
645 with datastore.transaction():
646 dataId = dict({"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"})
647 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
648 datastore.put(metrics, refInner)
649 # All datasets should exist
650 for ref in (refBefore, refOuter, refInner):
651 metricsOut = datastore.get(ref, parameters=None)
652 self.assertEqual(metrics, metricsOut)
653 raise TransactionTestError("This should roll back the transaction")
654 # Dataset(s) inserted before the transaction should still exist
655 metricsOut = datastore.get(refBefore, parameters=None)
656 self.assertEqual(metrics, metricsOut)
657 # But all datasets inserted during the (rolled back) transaction
658 # should be gone
659 with self.assertRaises(FileNotFoundError):
660 datastore.get(refOuter)
661 with self.assertRaises(FileNotFoundError):
662 datastore.get(refInner)
664 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]:
665 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
666 dimensions = self.universe.extract(("visit", "physical_filter"))
667 metrics = makeExampleMetrics()
668 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
669 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
670 return metrics, ref
672 def runIngestTest(
673 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True
674 ) -> None:
675 metrics, ref = self._prepareIngestTest()
676 # The file will be deleted after the test.
677 # For symlink tests this leads to a situation where the datastore
678 # points to a file that does not exist. This will make os.path.exist
679 # return False but then the new symlink will fail with
680 # FileExistsError later in the code so the test still passes.
681 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
682 with open(path, "w") as fd:
683 yaml.dump(metrics._asdict(), stream=fd)
684 func(metrics, path, ref)
686 def testIngestNoTransfer(self) -> None:
687 """Test ingesting existing files with no transfer."""
688 for mode in (None, "auto"):
689 # Some datastores have auto but can't do in place transfer
690 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
691 continue
693 with self.subTest(mode=mode):
694 datastore = self.makeDatastore()
696 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
697 """Ingest a file already in the datastore root."""
698 # first move it into the root, and adjust the path
699 # accordingly
700 path = shutil.copy(path, datastore.root.ospath)
701 path = os.path.relpath(path, start=datastore.root.ospath)
702 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
703 self.assertEqual(obj, datastore.get(ref))
705 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
706 """Can't ingest files if we're given a bad path."""
707 with self.assertRaises(FileNotFoundError):
708 datastore.ingest(
709 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
710 )
711 self.assertFalse(datastore.exists(ref))
713 def failOutsideRoot(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
714 """Can't ingest files outside of datastore root unless
715 auto.
716 """
717 if mode == "auto":
718 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
719 self.assertTrue(datastore.exists(ref))
720 else:
721 with self.assertRaises(RuntimeError):
722 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
723 self.assertFalse(datastore.exists(ref))
725 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
726 with self.assertRaises(NotImplementedError):
727 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
729 if mode in self.ingestTransferModes:
730 self.runIngestTest(failOutsideRoot)
731 self.runIngestTest(failInputDoesNotExist)
732 self.runIngestTest(succeed)
733 else:
734 self.runIngestTest(failNotImplemented)
736 def testIngestTransfer(self) -> None:
737 """Test ingesting existing files after transferring them."""
738 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
739 with self.subTest(mode=mode):
740 datastore = self.makeDatastore(mode)
742 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
743 """Ingest a file by transferring it to the template
744 location.
745 """
746 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
747 self.assertEqual(obj, datastore.get(ref))
749 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
750 """Can't ingest files if we're given a bad path."""
751 with self.assertRaises(FileNotFoundError):
752 # Ensure the file does not look like it is in
753 # datastore for auto mode
754 datastore.ingest(
755 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
756 )
757 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
759 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
760 with self.assertRaises(NotImplementedError):
761 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
763 if mode in self.ingestTransferModes:
764 self.runIngestTest(failInputDoesNotExist)
765 self.runIngestTest(succeed, expectOutput=(mode != "move"))
766 else:
767 self.runIngestTest(failNotImplemented)
769 def testIngestSymlinkOfSymlink(self) -> None:
770 """Special test for symlink to a symlink ingest"""
771 metrics, ref = self._prepareIngestTest()
772 # The aim of this test is to create a dataset on disk, then
773 # create a symlink to it and finally ingest the symlink such that
774 # the symlink in the datastore points to the original dataset.
775 for mode in ("symlink", "relsymlink"):
776 if mode not in self.ingestTransferModes:
777 continue
779 print(f"Trying mode {mode}")
780 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
781 with open(realpath, "w") as fd:
782 yaml.dump(metrics._asdict(), stream=fd)
783 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
784 os.symlink(os.path.abspath(realpath), sympath)
786 datastore = self.makeDatastore()
787 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
789 uri = datastore.getURI(ref)
790 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
791 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
793 linkTarget = os.readlink(uri.ospath)
794 if mode == "relsymlink":
795 self.assertFalse(os.path.isabs(linkTarget))
796 else:
797 self.assertEqual(linkTarget, os.path.abspath(realpath))
799 # Check that we can get the dataset back regardless of mode
800 metric2 = datastore.get(ref)
801 self.assertEqual(metric2, metrics)
803 # Cleanup the file for next time round loop
804 # since it will get the same file name in store
805 datastore.remove(ref)
807 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
808 datastore = self.makeDatastore(name)
810 # For now only the FileDatastore can be used for this test.
811 # ChainedDatastore that only includes InMemoryDatastores have to be
812 # skipped as well.
813 for name in datastore.names:
814 if not name.startswith("InMemoryDatastore"):
815 break
816 else:
817 raise unittest.SkipTest("in-memory datastore does not support record export/import")
819 metrics = makeExampleMetrics()
820 dimensions = self.universe.extract(("visit", "physical_filter"))
821 sc = self.storageClassFactory.getStorageClass("StructuredData")
823 refs = []
824 for visit in (2048, 2049, 2050):
825 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"}
826 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
827 datastore.put(metrics, ref)
828 refs.append(ref)
829 return datastore, refs
831 def testExportImportRecords(self) -> None:
832 """Test for export_records and import_records methods."""
833 datastore, refs = self._populate_export_datastore("test_datastore")
834 for exported_refs in (refs, refs[1:]):
835 n_refs = len(exported_refs)
836 records = datastore.export_records(exported_refs)
837 self.assertGreater(len(records), 0)
838 self.assertTrue(set(records.keys()) <= set(datastore.names))
839 # In a ChainedDatastore each FileDatastore will have a complete set
840 for datastore_name in records:
841 record_data = records[datastore_name]
842 self.assertEqual(len(record_data.records), n_refs)
844 # Check that subsetting works, include non-existing dataset ID.
845 dataset_ids = {exported_refs[0].id, uuid.uuid4()}
846 subset = record_data.subset(dataset_ids)
847 assert subset is not None
848 self.assertEqual(len(subset.records), 1)
849 subset = record_data.subset({uuid.uuid4()})
850 self.assertIsNone(subset)
852 # Use the same datastore name to import relative path.
853 datastore2 = self.makeDatastore("test_datastore")
855 records = datastore.export_records(refs[1:])
856 datastore2.import_records(records)
858 with self.assertRaises(FileNotFoundError):
859 data = datastore2.get(refs[0])
860 data = datastore2.get(refs[1])
861 self.assertIsNotNone(data)
862 data = datastore2.get(refs[2])
863 self.assertIsNotNone(data)
865 def testExport(self) -> None:
866 datastore, refs = self._populate_export_datastore("test_datastore")
868 datasets = list(datastore.export(refs))
869 self.assertEqual(len(datasets), 3)
871 for transfer in (None, "auto"):
872 # Both will default to None
873 datasets = list(datastore.export(refs, transfer=transfer))
874 self.assertEqual(len(datasets), 3)
876 with self.assertRaises(TypeError):
877 list(datastore.export(refs, transfer="copy"))
879 with self.assertRaises(TypeError):
880 list(datastore.export(refs, directory="exportDir", transfer="move"))
882 # Create a new ref that is not known to the datastore and try to
883 # export it.
884 sc = self.storageClassFactory.getStorageClass("ThingOne")
885 dimensions = self.universe.extract(("visit", "physical_filter"))
886 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
887 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
888 with self.assertRaises(FileNotFoundError):
889 list(datastore.export(refs + [ref], transfer=None))
891 def test_pydantic_dict_storage_class_conversions(self) -> None:
892 """Test converting a dataset stored as a pydantic model into a dict on
893 read.
894 """
895 datastore = self.makeDatastore()
896 store_as_model = self.makeDatasetRef(
897 "store_as_model",
898 dimensions=self.universe.empty,
899 storageClass="DictConvertibleModel",
900 dataId=DataCoordinate.makeEmpty(self.universe),
901 )
902 content = {"a": "one", "b": "two"}
903 model = DictConvertibleModel.from_dict(content, extra="original content")
904 datastore.put(model, store_as_model)
905 retrieved_model = datastore.get(store_as_model)
906 self.assertEqual(retrieved_model, model)
907 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
908 self.assertEqual(type(loaded), dict)
909 self.assertEqual(loaded, content)
911 def test_simple_class_put_get(self) -> None:
912 """Test that we can put and get a simple class with dict()
913 constructor.
914 """
915 datastore = self.makeDatastore()
916 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
917 self._assert_different_puts(datastore, "MetricsExample", data)
919 def test_dataclass_put_get(self) -> None:
920 """Test that we can put and get a simple dataclass."""
921 datastore = self.makeDatastore()
922 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
923 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
925 def test_pydantic_put_get(self) -> None:
926 """Test that we can put and get a simple Pydantic model."""
927 datastore = self.makeDatastore()
928 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
929 self._assert_different_puts(datastore, "MetricsExampleModel", data)
931 def test_tuple_put_get(self) -> None:
932 """Test that we can put and get a tuple."""
933 datastore = self.makeDatastore()
934 data = tuple(["a", "b", 1])
935 self._assert_different_puts(datastore, "TupleExample", data)
937 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None:
938 refs = {
939 x: self.makeDatasetRef(
940 f"stora_as_{x}",
941 dimensions=self.universe.empty,
942 storageClass=f"{storageClass_root}{x}",
943 dataId=DataCoordinate.makeEmpty(self.universe),
944 )
945 for x in ["A", "B"]
946 }
948 for ref in refs.values():
949 datastore.put(data, ref)
951 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
954class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
955 """PosixDatastore specialization"""
957 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
958 uriScheme = "file"
959 canIngestNoTransferAuto = True
960 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
961 isEphemeral = False
962 rootKeys = ("root",)
963 validationCanFail = True
965 def setUp(self) -> None:
966 # Override the working directory before calling the base class
967 self.root = tempfile.mkdtemp(dir=TESTDIR)
968 super().setUp()
970 def testAtomicWrite(self) -> None:
971 """Test that we write to a temporary and then rename"""
972 datastore = self.makeDatastore()
973 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
974 dimensions = self.universe.extract(("visit", "physical_filter"))
975 metrics = makeExampleMetrics()
977 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
978 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
980 with self.assertLogs("lsst.resources", "DEBUG") as cm:
981 datastore.put(metrics, ref)
982 move_logs = [ll for ll in cm.output if "transfer=" in ll]
983 self.assertIn("transfer=move", move_logs[0])
985 # And the transfer should be file to file.
986 self.assertEqual(move_logs[0].count("file://"), 2)
988 def testCanNotDeterminePutFormatterLocation(self) -> None:
989 """Verify that the expected exception is raised if the FileDatastore
990 can not determine the put formatter location.
991 """
992 _ = makeExampleMetrics()
993 datastore = self.makeDatastore()
995 # Create multiple storage classes for testing different formulations
996 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
998 sccomp = StorageClass("Dummy")
999 compositeStorageClass = StorageClass(
1000 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1001 )
1003 dimensions = self.universe.extract(("visit", "physical_filter"))
1004 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1006 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1007 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId)
1009 def raiser(ref: DatasetRef) -> None:
1010 raise DatasetTypeNotSupportedError()
1012 with unittest.mock.patch.object(
1013 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1014 "_determine_put_formatter_location",
1015 side_effect=raiser,
1016 ):
1017 # verify the non-composite ref execution path:
1018 with self.assertRaises(DatasetTypeNotSupportedError):
1019 datastore.getURIs(ref, predict=True)
1021 # verify the composite-ref execution path:
1022 with self.assertRaises(DatasetTypeNotSupportedError):
1023 datastore.getURIs(compRef, predict=True)
1026class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1027 """Posix datastore tests but with checksums disabled."""
1029 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1031 def testChecksum(self) -> None:
1032 """Ensure that checksums have not been calculated."""
1033 datastore = self.makeDatastore()
1034 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1035 dimensions = self.universe.extract(("visit", "physical_filter"))
1036 metrics = makeExampleMetrics()
1038 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
1039 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1041 # Configuration should have disabled checksum calculation
1042 datastore.put(metrics, ref)
1043 infos = datastore.getStoredItemsInfo(ref)
1044 self.assertIsNone(infos[0].checksum)
1046 # Remove put back but with checksums enabled explicitly
1047 datastore.remove(ref)
1048 datastore.useChecksum = True
1049 datastore.put(metrics, ref)
1051 infos = datastore.getStoredItemsInfo(ref)
1052 self.assertIsNotNone(infos[0].checksum)
1055class TrashDatastoreTestCase(PosixDatastoreTestCase):
1056 """Restrict trash test to FileDatastore."""
1058 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1060 def testTrash(self) -> None:
1061 datastore, *refs = self.prepDeleteTest(n_refs=10)
1063 # Trash one of them.
1064 ref = refs.pop()
1065 uri = datastore.getURI(ref)
1066 datastore.trash(ref)
1067 self.assertTrue(uri.exists(), uri) # Not deleted yet
1068 datastore.emptyTrash()
1069 self.assertFalse(uri.exists(), uri)
1071 # Trash it again should be fine.
1072 datastore.trash(ref)
1074 # Trash multiple items at once.
1075 subset = [refs.pop(), refs.pop()]
1076 datastore.trash(subset)
1077 datastore.emptyTrash()
1079 # Remove a record and trash should do nothing.
1080 # This is execution butler scenario.
1081 ref = refs.pop()
1082 uri = datastore.getURI(ref)
1083 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1084 self.assertTrue(uri.exists())
1085 datastore.trash(ref)
1086 datastore.emptyTrash()
1087 self.assertTrue(uri.exists())
1089 # Switch on trust and it should delete the file.
1090 datastore.trustGetRequest = True
1091 datastore.trash([ref])
1092 self.assertFalse(uri.exists())
1094 # Remove multiples at once in trust mode.
1095 subset = [refs.pop() for i in range(3)]
1096 datastore.trash(subset)
1097 datastore.trash(refs.pop()) # Check that a single ref can trash
1100class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1101 """Test datastore cleans up on failure."""
1103 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1105 def setUp(self) -> None:
1106 # Override the working directory before calling the base class
1107 self.root = tempfile.mkdtemp(dir=TESTDIR)
1108 super().setUp()
1110 def testCleanup(self) -> None:
1111 """Test that a failed formatter write does cleanup a partial file."""
1112 metrics = makeExampleMetrics()
1113 datastore = self.makeDatastore()
1115 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1117 dimensions = self.universe.extract(("visit", "physical_filter"))
1118 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1120 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1122 # Determine where the file will end up (we assume Formatters use
1123 # the same file extension)
1124 expectedUri = datastore.getURI(ref, predict=True)
1125 self.assertEqual(expectedUri.fragment, "predicted")
1127 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1129 # Try formatter that fails and formatter that fails and leaves
1130 # a file behind
1131 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1132 with self.subTest(formatter=formatter):
1133 # Monkey patch the formatter
1134 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1136 # Try to put the dataset, it should fail
1137 with self.assertRaises(Exception):
1138 datastore.put(metrics, ref)
1140 # Check that there is no file on disk
1141 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1143 # Check that there is a directory
1144 dir = expectedUri.dirname()
1145 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1147 # Force YamlFormatter and check that this time a file is written
1148 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1149 datastore.put(metrics, ref)
1150 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1151 datastore.remove(ref)
1152 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1155class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1156 """PosixDatastore specialization"""
1158 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1159 uriScheme = "mem"
1160 hasUnsupportedPut = False
1161 ingestTransferModes = ()
1162 isEphemeral = True
1163 rootKeys = None
1164 validationCanFail = False
1167class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1168 """ChainedDatastore specialization using a POSIXDatastore"""
1170 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1171 hasUnsupportedPut = False
1172 canIngestNoTransferAuto = False
1173 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1174 isEphemeral = False
1175 rootKeys = (".datastores.1.root", ".datastores.2.root")
1176 validationCanFail = True
1179class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1180 """ChainedDatastore specialization using all InMemoryDatastore"""
1182 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1183 validationCanFail = False
1186class DatastoreConstraintsTests(DatastoreTestsBase):
1187 """Basic tests of constraints model of Datastores."""
1189 def testConstraints(self) -> None:
1190 """Test constraints model. Assumes that each test class has the
1191 same constraints.
1192 """
1193 metrics = makeExampleMetrics()
1194 datastore = self.makeDatastore()
1196 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1197 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1198 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1199 dataId = dict({"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"})
1201 # Write empty file suitable for ingest check (JSON and YAML variants)
1202 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1203 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1204 for datasetTypeName, sc, accepted in (
1205 ("metric", sc1, True),
1206 ("metric5", sc1, False),
1207 ("metric33", sc1, True),
1208 ("metric5", sc2, True),
1209 ):
1210 # Choose different temp file depending on StorageClass
1211 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1213 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1214 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
1215 if accepted:
1216 datastore.put(metrics, ref)
1217 self.assertTrue(datastore.exists(ref))
1218 datastore.remove(ref)
1220 # Try ingest
1221 if self.canIngest:
1222 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1223 self.assertTrue(datastore.exists(ref))
1224 datastore.remove(ref)
1225 else:
1226 with self.assertRaises(DatasetTypeNotSupportedError):
1227 datastore.put(metrics, ref)
1228 self.assertFalse(datastore.exists(ref))
1230 # Again with ingest
1231 if self.canIngest:
1232 with self.assertRaises(DatasetTypeNotSupportedError):
1233 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1234 self.assertFalse(datastore.exists(ref))
1237class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1238 """PosixDatastore specialization"""
1240 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1241 canIngest = True
1243 def setUp(self) -> None:
1244 # Override the working directory before calling the base class
1245 self.root = tempfile.mkdtemp(dir=TESTDIR)
1246 super().setUp()
1249class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1250 """InMemoryDatastore specialization."""
1252 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1253 canIngest = False
1256class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1257 """ChainedDatastore specialization using a POSIXDatastore and constraints
1258 at the ChainedDatstore.
1259 """
1261 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1264class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1265 """ChainedDatastore specialization using a POSIXDatastore."""
1267 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1270class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1271 """ChainedDatastore specialization using all InMemoryDatastore."""
1273 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1274 canIngest = False
1277class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1278 """Test that a chained datastore can control constraints per-datastore
1279 even if child datastore would accept.
1280 """
1282 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1284 def setUp(self) -> None:
1285 # Override the working directory before calling the base class
1286 self.root = tempfile.mkdtemp(dir=TESTDIR)
1287 super().setUp()
1289 def testConstraints(self) -> None:
1290 """Test chained datastore constraints model."""
1291 metrics = makeExampleMetrics()
1292 datastore = self.makeDatastore()
1294 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1295 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1296 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1297 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}
1298 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"}
1300 # Write empty file suitable for ingest check (JSON and YAML variants)
1301 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1302 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1304 for typeName, dataId, sc, accept, ingest in (
1305 ("metric", dataId1, sc1, (False, True, False), True),
1306 ("metric5", dataId1, sc1, (False, False, False), False),
1307 ("metric5", dataId2, sc1, (True, False, False), False),
1308 ("metric33", dataId2, sc2, (True, True, False), True),
1309 ("metric5", dataId1, sc2, (False, True, False), True),
1310 ):
1311 # Choose different temp file depending on StorageClass
1312 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1314 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1315 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId)
1316 if any(accept):
1317 datastore.put(metrics, ref)
1318 self.assertTrue(datastore.exists(ref))
1320 # Check each datastore inside the chained datastore
1321 for childDatastore, expected in zip(datastore.datastores, accept):
1322 self.assertEqual(
1323 childDatastore.exists(ref),
1324 expected,
1325 f"Testing presence of {ref} in datastore {childDatastore.name}",
1326 )
1328 datastore.remove(ref)
1330 # Check that ingest works
1331 if ingest:
1332 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1333 self.assertTrue(datastore.exists(ref))
1335 # Check each datastore inside the chained datastore
1336 for childDatastore, expected in zip(datastore.datastores, accept):
1337 # Ephemeral datastores means InMemory at the moment
1338 # and that does not accept ingest of files.
1339 if childDatastore.isEphemeral:
1340 expected = False
1341 self.assertEqual(
1342 childDatastore.exists(ref),
1343 expected,
1344 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1345 )
1347 datastore.remove(ref)
1348 else:
1349 with self.assertRaises(DatasetTypeNotSupportedError):
1350 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1352 else:
1353 with self.assertRaises(DatasetTypeNotSupportedError):
1354 datastore.put(metrics, ref)
1355 self.assertFalse(datastore.exists(ref))
1357 # Again with ingest
1358 with self.assertRaises(DatasetTypeNotSupportedError):
1359 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1360 self.assertFalse(datastore.exists(ref))
1363class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1364 """Tests for datastore caching infrastructure."""
1366 @classmethod
1367 def setUpClass(cls) -> None:
1368 cls.storageClassFactory = StorageClassFactory()
1369 cls.universe = DimensionUniverse()
1371 # Ensure that we load the test storage class definitions.
1372 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1373 cls.storageClassFactory.addFromConfig(scConfigFile)
1375 def setUp(self) -> None:
1376 self.id = 0
1378 # Create a root that we can use for caching tests.
1379 self.root = tempfile.mkdtemp(dir=TESTDIR)
1381 # Create some test dataset refs and associated test files
1382 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1383 dimensions = self.universe.extract(("visit", "physical_filter"))
1384 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1386 # Create list of refs and list of temporary files
1387 n_datasets = 10
1388 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)]
1390 root_uri = ResourcePath(self.root, forceDirectory=True)
1391 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1393 # Create test files.
1394 for uri in self.files:
1395 uri.write(b"0123456789")
1397 # Create some composite refs with component files.
1398 sc = self.storageClassFactory.getStorageClass("StructuredData")
1399 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)]
1400 self.comp_files = []
1401 self.comp_refs = []
1402 for n, ref in enumerate(self.composite_refs):
1403 component_refs = []
1404 component_files = []
1405 for component in sc.components:
1406 component_ref = ref.makeComponentRef(component)
1407 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1408 component_refs.append(component_ref)
1409 component_files.append(file)
1410 file.write(b"9876543210")
1412 self.comp_files.append(component_files)
1413 self.comp_refs.append(component_refs)
1415 def tearDown(self) -> None:
1416 if self.root is not None and os.path.exists(self.root):
1417 shutil.rmtree(self.root, ignore_errors=True)
1419 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1420 config = Config.fromYaml(config_str)
1421 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1423 def testNoCacheDir(self) -> None:
1424 config_str = """
1425cached:
1426 root: null
1427 cacheable:
1428 metric0: true
1429 """
1430 cache_manager = self._make_cache_manager(config_str)
1432 # Look inside to check we don't have a cache directory
1433 self.assertIsNone(cache_manager._cache_directory)
1435 self.assertCache(cache_manager)
1437 # Test that the cache directory is marked temporary
1438 self.assertTrue(cache_manager.cache_directory.isTemporary)
1440 def testNoCacheDirReversed(self) -> None:
1441 """Use default caching status and metric1 to false"""
1442 config_str = """
1443cached:
1444 root: null
1445 default: true
1446 cacheable:
1447 metric1: false
1448 """
1449 cache_manager = self._make_cache_manager(config_str)
1451 self.assertCache(cache_manager)
1453 def testEnvvarCacheDir(self) -> None:
1454 config_str = f"""
1455cached:
1456 root: '{self.root}'
1457 cacheable:
1458 metric0: true
1459 """
1461 root = ResourcePath(self.root, forceDirectory=True)
1462 env_dir = root.join("somewhere", forceDirectory=True)
1463 elsewhere = root.join("elsewhere", forceDirectory=True)
1465 # Environment variable should override the config value.
1466 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1467 cache_manager = self._make_cache_manager(config_str)
1468 self.assertEqual(cache_manager.cache_directory, env_dir)
1470 # This environment variable should not override the config value.
1471 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1472 cache_manager = self._make_cache_manager(config_str)
1473 self.assertEqual(cache_manager.cache_directory, root)
1475 # No default setting.
1476 config_str = """
1477cached:
1478 root: null
1479 default: true
1480 cacheable:
1481 metric1: false
1482 """
1483 cache_manager = self._make_cache_manager(config_str)
1485 # This environment variable should override the config value.
1486 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1487 cache_manager = self._make_cache_manager(config_str)
1488 self.assertEqual(cache_manager.cache_directory, env_dir)
1490 # If both environment variables are set the main (not IF_UNSET)
1491 # variable should win.
1492 with unittest.mock.patch.dict(
1493 os.environ,
1494 {
1495 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1496 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1497 },
1498 ):
1499 cache_manager = self._make_cache_manager(config_str)
1500 self.assertEqual(cache_manager.cache_directory, env_dir)
1502 # Use the API to set the environment variable, making sure that the
1503 # variable is reset on exit.
1504 with unittest.mock.patch.dict(
1505 os.environ,
1506 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1507 ):
1508 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1509 self.assertTrue(defined)
1510 cache_manager = self._make_cache_manager(config_str)
1511 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1513 # Now create the cache manager ahead of time and set the fallback
1514 # later.
1515 cache_manager = self._make_cache_manager(config_str)
1516 self.assertIsNone(cache_manager._cache_directory)
1517 with unittest.mock.patch.dict(
1518 os.environ,
1519 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1520 ):
1521 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1522 self.assertTrue(defined)
1523 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1525 def testExplicitCacheDir(self) -> None:
1526 config_str = f"""
1527cached:
1528 root: '{self.root}'
1529 cacheable:
1530 metric0: true
1531 """
1532 cache_manager = self._make_cache_manager(config_str)
1534 # Look inside to check we do have a cache directory.
1535 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1537 self.assertCache(cache_manager)
1539 # Test that the cache directory is not marked temporary
1540 self.assertFalse(cache_manager.cache_directory.isTemporary)
1542 def assertCache(self, cache_manager: DatastoreCacheManager) -> None:
1543 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1544 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1546 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1547 self.assertIsInstance(uri, ResourcePath)
1548 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1550 # Check presence in cache using ref and then using file extension.
1551 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1552 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1553 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1554 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1556 # Cached file should no longer exist but uncached file should be
1557 # unaffected.
1558 self.assertFalse(self.files[0].exists())
1559 self.assertTrue(self.files[1].exists())
1561 # Should find this file and it should be within the cache directory.
1562 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1563 self.assertTrue(found.exists())
1564 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1566 # Should not be able to find these in cache
1567 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1568 self.assertIsNone(found)
1569 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1570 self.assertIsNone(found)
1572 def testNoCache(self) -> None:
1573 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1574 for uri, ref in zip(self.files, self.refs):
1575 self.assertFalse(cache_manager.should_be_cached(ref))
1576 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1577 self.assertFalse(cache_manager.known_to_cache(ref))
1578 with cache_manager.find_in_cache(ref, ".txt") as found:
1579 self.assertIsNone(found, msg=f"{cache_manager}")
1581 def _expiration_config(self, mode: str, threshold: int) -> str:
1582 return f"""
1583cached:
1584 default: true
1585 expiry:
1586 mode: {mode}
1587 threshold: {threshold}
1588 cacheable:
1589 unused: true
1590 """
1592 def testCacheExpiryFiles(self) -> None:
1593 threshold = 2 # Keep at least 2 files.
1594 mode = "files"
1595 config_str = self._expiration_config(mode, threshold)
1597 cache_manager = self._make_cache_manager(config_str)
1599 # Check that an empty cache returns unknown for arbitrary ref
1600 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1602 # Should end with datasets: 2, 3, 4
1603 self.assertExpiration(cache_manager, 5, threshold + 1)
1604 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1606 # Check that we will not expire a file that is actively in use.
1607 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1608 self.assertIsNotNone(found)
1610 # Trigger cache expiration that should remove the file
1611 # we just retrieved. Should now have: 3, 4, 5
1612 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1613 self.assertIsNotNone(cached)
1615 # Cache should still report the standard file count.
1616 self.assertEqual(cache_manager.file_count, threshold + 1)
1618 # Add additional entry to cache.
1619 # Should now have 4, 5, 6
1620 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1621 self.assertIsNotNone(cached)
1623 # Is the file still there?
1624 self.assertTrue(found.exists())
1626 # Can we read it?
1627 data = found.read()
1628 self.assertGreater(len(data), 0)
1630 # Outside context the file should no longer exist.
1631 self.assertFalse(found.exists())
1633 # File count should not have changed.
1634 self.assertEqual(cache_manager.file_count, threshold + 1)
1636 # Dataset 2 was in the exempt directory but because hardlinks
1637 # are used it was deleted from the main cache during cache expiry
1638 # above and so should no longer be found.
1639 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1640 self.assertIsNone(found)
1642 # And the one stored after it is also gone.
1643 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1644 self.assertIsNone(found)
1646 # But dataset 4 is present.
1647 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1648 self.assertIsNotNone(found)
1650 # Adding a new dataset to the cache should now delete it.
1651 cache_manager.move_to_cache(self.files[7], self.refs[7])
1653 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1654 self.assertIsNone(found)
1656 def testCacheExpiryDatasets(self) -> None:
1657 threshold = 2 # Keep 2 datasets.
1658 mode = "datasets"
1659 config_str = self._expiration_config(mode, threshold)
1661 cache_manager = self._make_cache_manager(config_str)
1662 self.assertExpiration(cache_manager, 5, threshold + 1)
1663 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1665 def testCacheExpiryDatasetsComposite(self) -> None:
1666 threshold = 2 # Keep 2 datasets.
1667 mode = "datasets"
1668 config_str = self._expiration_config(mode, threshold)
1670 cache_manager = self._make_cache_manager(config_str)
1672 n_datasets = 3
1673 for i in range(n_datasets):
1674 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1675 cached = cache_manager.move_to_cache(component_file, component_ref)
1676 self.assertIsNotNone(cached)
1677 self.assertTrue(cache_manager.known_to_cache(component_ref))
1678 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1679 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1681 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1683 # Write two new non-composite and the number of files should drop.
1684 self.assertExpiration(cache_manager, 2, 5)
1686 def testCacheExpirySize(self) -> None:
1687 threshold = 55 # Each file is 10 bytes
1688 mode = "size"
1689 config_str = self._expiration_config(mode, threshold)
1691 cache_manager = self._make_cache_manager(config_str)
1692 self.assertExpiration(cache_manager, 10, 6)
1693 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1695 def assertExpiration(
1696 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int
1697 ) -> None:
1698 """Insert the datasets and then check the number retained."""
1699 for i in range(n_datasets):
1700 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1701 self.assertIsNotNone(cached)
1703 self.assertEqual(cache_manager.file_count, n_retained)
1705 # The oldest file should not be in the cache any more.
1706 for i in range(n_datasets):
1707 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1708 if i >= n_datasets - n_retained:
1709 self.assertIsInstance(found, ResourcePath)
1710 else:
1711 self.assertIsNone(found)
1713 def testCacheExpiryAge(self) -> None:
1714 threshold = 1 # Expire older than 2 seconds
1715 mode = "age"
1716 config_str = self._expiration_config(mode, threshold)
1718 cache_manager = self._make_cache_manager(config_str)
1719 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1721 # Insert 3 files, then sleep, then insert more.
1722 for i in range(2):
1723 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1724 self.assertIsNotNone(cached)
1725 time.sleep(2.0)
1726 for j in range(4):
1727 i = 2 + j # Continue the counting
1728 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1729 self.assertIsNotNone(cached)
1731 # Only the files written after the sleep should exist.
1732 self.assertEqual(cache_manager.file_count, 4)
1733 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1734 self.assertIsNone(found)
1735 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1736 self.assertIsInstance(found, ResourcePath)
1739class DatasetRefURIsTestCase(unittest.TestCase):
1740 """Tests for DatasetRefURIs."""
1742 def testSequenceAccess(self) -> None:
1743 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1744 uris = DatasetRefURIs()
1746 self.assertEqual(len(uris), 2)
1747 self.assertEqual(uris[0], None)
1748 self.assertEqual(uris[1], {})
1750 primaryURI = ResourcePath("1/2/3")
1751 componentURI = ResourcePath("a/b/c")
1753 # affirm that DatasetRefURIs does not support MutableSequence functions
1754 with self.assertRaises(TypeError):
1755 uris[0] = primaryURI
1756 with self.assertRaises(TypeError):
1757 uris[1] = {"foo": componentURI}
1759 # but DatasetRefURIs can be set by property name:
1760 uris.primaryURI = primaryURI
1761 uris.componentURIs = {"foo": componentURI}
1762 self.assertEqual(uris.primaryURI, primaryURI)
1763 self.assertEqual(uris[0], primaryURI)
1765 primary, components = uris
1766 self.assertEqual(primary, primaryURI)
1767 self.assertEqual(components, {"foo": componentURI})
1769 def testRepr(self) -> None:
1770 """Verify __repr__ output."""
1771 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")})
1772 self.assertEqual(
1773 repr(uris),
1774 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})',
1775 )
1778class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase):
1779 """Test the StoredFileInfo class."""
1781 storageClassFactory = StorageClassFactory()
1783 def test_StoredFileInfo(self) -> None:
1784 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1785 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1787 record = dict(
1788 storage_class="StructuredDataDict",
1789 formatter="lsst.daf.butler.Formatter",
1790 path="a/b/c.txt",
1791 component="component",
1792 dataset_id=ref.id,
1793 checksum=None,
1794 file_size=5,
1795 )
1796 info = StoredFileInfo.from_record(record)
1798 self.assertEqual(info.dataset_id, ref.id)
1799 self.assertEqual(info.to_record(), record)
1801 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1802 rebased = info.rebase(ref2)
1803 self.assertEqual(rebased.dataset_id, ref2.id)
1804 self.assertEqual(rebased.rebase(ref), info)
1806 with self.assertRaises(TypeError):
1807 rebased.update(formatter=42)
1809 with self.assertRaises(ValueError):
1810 rebased.update(something=42, new="42")
1812 # Check that pickle works on StoredFileInfo.
1813 pickled_info = pickle.dumps(info)
1814 unpickled_info = pickle.loads(pickled_info)
1815 self.assertEqual(unpickled_info, info)
1818if __name__ == "__main__":
1819 unittest.main()