Coverage for tests/test_datastore.py: 11%
1091 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30import contextlib
31import os
32import pickle
33import shutil
34import tempfile
35import time
36import unittest
37import unittest.mock
38import uuid
39from collections.abc import Callable, Iterator
40from typing import Any, cast
42import lsst.utils.tests
43import yaml
44from lsst.daf.butler import (
45 Config,
46 DataCoordinate,
47 DatasetIdGenEnum,
48 DatasetRef,
49 DatasetType,
50 DatasetTypeNotSupportedError,
51 Datastore,
52 DimensionUniverse,
53 FileDataset,
54 StorageClass,
55 StorageClassFactory,
56)
57from lsst.daf.butler.datastore import DatasetRefURIs, DatastoreConfig, DatastoreValidationError, NullDatastore
58from lsst.daf.butler.datastore.cache_manager import (
59 DatastoreCacheManager,
60 DatastoreCacheManagerConfig,
61 DatastoreDisabledCacheManager,
62)
63from lsst.daf.butler.datastore.stored_file_info import StoredFileInfo
64from lsst.daf.butler.formatters.yaml import YamlFormatter
65from lsst.daf.butler.tests import (
66 BadNoWriteFormatter,
67 BadWriteFormatter,
68 DatasetTestHelper,
69 DatastoreTestHelper,
70 DummyRegistry,
71 MetricsExample,
72 MetricsExampleDataclass,
73 MetricsExampleModel,
74)
75from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
76from lsst.daf.butler.tests.utils import TestCaseMixin
77from lsst.resources import ResourcePath
78from lsst.utils import doImport
80TESTDIR = os.path.dirname(__file__)
83def makeExampleMetrics(use_none: bool = False) -> MetricsExample:
84 """Make example dataset that can be stored in butler."""
85 if use_none:
86 array = None
87 else:
88 array = [563, 234, 456.7, 105, 2054, -1045]
89 return MetricsExample(
90 {"AM1": 5.2, "AM2": 30.6},
91 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
92 array,
93 )
96class TransactionTestError(Exception):
97 """Specific error for transactions, to prevent misdiagnosing
98 that might otherwise occur when a standard exception is used.
99 """
101 pass
104class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin):
105 """Support routines for datastore testing"""
107 root: str | None = None
108 universe: DimensionUniverse
109 storageClassFactory: StorageClassFactory
111 @classmethod
112 def setUpClass(cls) -> None:
113 # Storage Classes are fixed for all datastores in these tests
114 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
115 cls.storageClassFactory = StorageClassFactory()
116 cls.storageClassFactory.addFromConfig(scConfigFile)
118 # Read the Datastore config so we can get the class
119 # information (since we should not assume the constructor
120 # name here, but rely on the configuration file itself)
121 datastoreConfig = DatastoreConfig(cls.configFile)
122 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"]))
123 cls.universe = DimensionUniverse()
125 def setUp(self) -> None:
126 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
128 def tearDown(self) -> None:
129 if self.root is not None and os.path.exists(self.root):
130 shutil.rmtree(self.root, ignore_errors=True)
133class DatastoreTests(DatastoreTestsBase):
134 """Some basic tests of a simple datastore."""
136 hasUnsupportedPut = True
137 rootKeys: tuple[str, ...] | None = None
138 isEphemeral: bool = False
139 validationCanFail: bool = False
141 def testConfigRoot(self) -> None:
142 full = DatastoreConfig(self.configFile)
143 config = DatastoreConfig(self.configFile, mergeDefaults=False)
144 newroot = "/random/location"
145 self.datastoreType.setConfigRoot(newroot, config, full)
146 if self.rootKeys:
147 for k in self.rootKeys:
148 self.assertIn(newroot, config[k])
150 def testConstructor(self) -> None:
151 datastore = self.makeDatastore()
152 self.assertIsNotNone(datastore)
153 self.assertIs(datastore.isEphemeral, self.isEphemeral)
155 def testConfigurationValidation(self) -> None:
156 datastore = self.makeDatastore()
157 sc = self.storageClassFactory.getStorageClass("ThingOne")
158 datastore.validateConfiguration([sc])
160 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
161 if self.validationCanFail:
162 with self.assertRaises(DatastoreValidationError):
163 datastore.validateConfiguration([sc2], logFailures=True)
165 dimensions = self.universe.conform(("visit", "physical_filter"))
166 dataId = {
167 "instrument": "dummy",
168 "visit": 52,
169 "physical_filter": "V",
170 "band": "v",
171 "day_obs": 20250101,
172 }
173 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
174 datastore.validateConfiguration([ref])
176 def testParameterValidation(self) -> None:
177 """Check that parameters are validated"""
178 sc = self.storageClassFactory.getStorageClass("ThingOne")
179 dimensions = self.universe.conform(("visit", "physical_filter"))
180 dataId = {
181 "instrument": "dummy",
182 "visit": 52,
183 "physical_filter": "V",
184 "band": "v",
185 "day_obs": 20250101,
186 }
187 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
188 datastore = self.makeDatastore()
189 data = {1: 2, 3: 4}
190 datastore.put(data, ref)
191 newdata = datastore.get(ref)
192 self.assertEqual(data, newdata)
193 with self.assertRaises(KeyError):
194 newdata = datastore.get(ref, parameters={"missing": 5})
196 def testBasicPutGet(self) -> None:
197 metrics = makeExampleMetrics()
198 datastore = self.makeDatastore()
200 # Create multiple storage classes for testing different formulations
201 storageClasses = [
202 self.storageClassFactory.getStorageClass(sc)
203 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
204 ]
206 dimensions = self.universe.conform(("visit", "physical_filter"))
207 dataId = {
208 "instrument": "dummy",
209 "visit": 52,
210 "physical_filter": "V",
211 "band": "v",
212 "day_obs": 20250101,
213 }
214 dataId2 = {
215 "instrument": "dummy",
216 "visit": 53,
217 "physical_filter": "V",
218 "band": "v",
219 "day_obs": 20250101,
220 }
222 for sc in storageClasses:
223 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
224 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2)
226 # Make sure that using getManyURIs without predicting before the
227 # dataset has been put raises.
228 with self.assertRaises(FileNotFoundError):
229 datastore.getManyURIs([ref], predict=False)
231 # Make sure that using getManyURIs with predicting before the
232 # dataset has been put predicts the URI.
233 uris = datastore.getManyURIs([ref, ref2], predict=True)
234 self.assertIn("52", uris[ref].primaryURI.geturl())
235 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
236 self.assertIn("53", uris[ref2].primaryURI.geturl())
237 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
239 datastore.put(metrics, ref)
241 # Does it exist?
242 self.assertTrue(datastore.exists(ref))
243 self.assertTrue(datastore.knows(ref))
244 multi = datastore.knows_these([ref])
245 self.assertTrue(multi[ref])
246 multi = datastore.mexists([ref, ref2])
247 self.assertTrue(multi[ref])
248 self.assertFalse(multi[ref2])
250 # Get
251 metricsOut = datastore.get(ref, parameters=None)
252 self.assertEqual(metrics, metricsOut)
254 uri = datastore.getURI(ref)
255 self.assertEqual(uri.scheme, self.uriScheme)
257 uris = datastore.getManyURIs([ref])
258 self.assertEqual(len(uris), 1)
259 ref, uri = uris.popitem()
260 self.assertTrue(uri.primaryURI.exists())
261 self.assertFalse(uri.componentURIs)
263 # Get a component -- we need to construct new refs for them
264 # with derived storage classes but with parent ID
265 for comp in ("data", "output"):
266 compRef = ref.makeComponentRef(comp)
267 output = datastore.get(compRef)
268 self.assertEqual(output, getattr(metricsOut, comp))
270 uri = datastore.getURI(compRef)
271 self.assertEqual(uri.scheme, self.uriScheme)
273 uris = datastore.getManyURIs([compRef])
274 self.assertEqual(len(uris), 1)
276 storageClass = sc
278 # Check that we can put a metric with None in a component and
279 # get it back as None
280 metricsNone = makeExampleMetrics(use_none=True)
281 dataIdNone = {
282 "instrument": "dummy",
283 "visit": 54,
284 "physical_filter": "V",
285 "band": "v",
286 "day_obs": 20250101,
287 }
288 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone)
289 datastore.put(metricsNone, refNone)
291 comp = "data"
292 for comp in ("data", "output"):
293 compRef = refNone.makeComponentRef(comp)
294 output = datastore.get(compRef)
295 self.assertEqual(output, getattr(metricsNone, comp))
297 # Check that a put fails if the dataset type is not supported
298 if self.hasUnsupportedPut:
299 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
300 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
301 with self.assertRaises(DatasetTypeNotSupportedError):
302 datastore.put(metrics, ref)
304 # These should raise
305 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId)
306 with self.assertRaises(FileNotFoundError):
307 # non-existing file
308 datastore.get(ref)
310 # Get a URI from it
311 uri = datastore.getURI(ref, predict=True)
312 self.assertEqual(uri.scheme, self.uriScheme)
314 with self.assertRaises(FileNotFoundError):
315 datastore.getURI(ref)
317 def testTrustGetRequest(self) -> None:
318 """Check that we can get datasets that registry knows nothing about."""
319 datastore = self.makeDatastore()
321 # Skip test if the attribute is not defined
322 if not hasattr(datastore, "trustGetRequest"):
323 return
325 metrics = makeExampleMetrics()
327 i = 0
328 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"):
329 i += 1
330 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
332 if sc_name == "StructuredComposite":
333 disassembled = True
334 else:
335 disassembled = False
337 # Start datastore in default configuration of using registry
338 datastore.trustGetRequest = False
340 # Create multiple storage classes for testing with or without
341 # disassembly
342 sc = self.storageClassFactory.getStorageClass(sc_name)
343 dimensions = self.universe.conform(("visit", "physical_filter"))
345 dataId = {
346 "instrument": "dummy",
347 "visit": 52 + i,
348 "physical_filter": "V",
349 "band": "v",
350 "day_obs": 20250101,
351 }
353 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
354 datastore.put(metrics, ref)
356 # Does it exist?
357 self.assertTrue(datastore.exists(ref))
358 self.assertTrue(datastore.knows(ref))
359 multi = datastore.knows_these([ref])
360 self.assertTrue(multi[ref])
361 multi = datastore.mexists([ref])
362 self.assertTrue(multi[ref])
364 # Get
365 metricsOut = datastore.get(ref)
366 self.assertEqual(metrics, metricsOut)
368 # Get the URI(s)
369 primaryURI, componentURIs = datastore.getURIs(ref)
370 if disassembled:
371 self.assertIsNone(primaryURI)
372 self.assertEqual(len(componentURIs), 3)
373 else:
374 self.assertIn(datasetTypeName, primaryURI.path)
375 self.assertFalse(componentURIs)
377 # Delete registry entry so now we are trusting
378 datastore.removeStoredItemInfo(ref)
380 # Now stop trusting and check that things break
381 datastore.trustGetRequest = False
383 # Does it exist?
384 self.assertFalse(datastore.exists(ref))
385 self.assertFalse(datastore.knows(ref))
386 multi = datastore.knows_these([ref])
387 self.assertFalse(multi[ref])
388 multi = datastore.mexists([ref])
389 self.assertFalse(multi[ref])
391 with self.assertRaises(FileNotFoundError):
392 datastore.get(ref)
394 if sc_name != "StructuredDataNoComponents":
395 with self.assertRaises(FileNotFoundError):
396 datastore.get(ref.makeComponentRef("data"))
398 # URI should fail unless we ask for prediction
399 with self.assertRaises(FileNotFoundError):
400 datastore.getURIs(ref)
402 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
403 if disassembled:
404 self.assertIsNone(predicted_primary)
405 self.assertEqual(len(predicted_disassembled), 3)
406 for uri in predicted_disassembled.values():
407 self.assertEqual(uri.fragment, "predicted")
408 self.assertIn(datasetTypeName, uri.path)
409 else:
410 self.assertIn(datasetTypeName, predicted_primary.path)
411 self.assertFalse(predicted_disassembled)
412 self.assertEqual(predicted_primary.fragment, "predicted")
414 # Now enable registry-free trusting mode
415 datastore.trustGetRequest = True
417 # Try again to get it
418 metricsOut = datastore.get(ref)
419 self.assertEqual(metricsOut, metrics)
421 # Does it exist?
422 self.assertTrue(datastore.exists(ref))
424 # Get a component
425 if sc_name != "StructuredDataNoComponents":
426 comp = "data"
427 compRef = ref.makeComponentRef(comp)
428 output = datastore.get(compRef)
429 self.assertEqual(output, getattr(metrics, comp))
431 # Get the URI -- if we trust this should work even without
432 # enabling prediction.
433 primaryURI2, componentURIs2 = datastore.getURIs(ref)
434 self.assertEqual(primaryURI2, primaryURI)
435 self.assertEqual(componentURIs2, componentURIs)
437 # Check for compatible storage class.
438 if sc_name in ("StructuredDataNoComponents", "StructuredData"):
439 # Make new dataset ref with compatible storage class.
440 ref_comp = ref.overrideStorageClass("StructuredDataDictJson")
442 # Without `set_retrieve_dataset_type_method` it will fail to
443 # find correct file.
444 self.assertFalse(datastore.exists(ref_comp))
445 with self.assertRaises(FileNotFoundError):
446 datastore.get(ref_comp)
447 with self.assertRaises(FileNotFoundError):
448 datastore.get(ref, storageClass="StructuredDataDictJson")
450 # Need a special method to generate stored dataset type.
451 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType:
452 if name == ref.datasetType.name:
453 return ref.datasetType
454 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}")
456 datastore.set_retrieve_dataset_type_method(_stored_dataset_type)
458 # Storage class override with original dataset ref.
459 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson")
460 self.assertIsInstance(metrics_as_dict, dict)
462 # get() should return a dict now.
463 metrics_as_dict = datastore.get(ref_comp)
464 self.assertIsInstance(metrics_as_dict, dict)
466 # exists() should work as well.
467 self.assertTrue(datastore.exists(ref_comp))
469 datastore.set_retrieve_dataset_type_method(None)
471 def testDisassembly(self) -> None:
472 """Test disassembly within datastore."""
473 metrics = makeExampleMetrics()
474 if self.isEphemeral:
475 # in-memory datastore does not disassemble
476 return
478 # Create multiple storage classes for testing different formulations
479 # of composites. One of these will not disassemble to provide
480 # a reference.
481 storageClasses = [
482 self.storageClassFactory.getStorageClass(sc)
483 for sc in (
484 "StructuredComposite",
485 "StructuredCompositeTestA",
486 "StructuredCompositeTestB",
487 "StructuredCompositeReadComp",
488 "StructuredData", # No disassembly
489 "StructuredCompositeReadCompNoDisassembly",
490 )
491 ]
493 # Create the test datastore
494 datastore = self.makeDatastore()
496 # Dummy dataId
497 dimensions = self.universe.conform(("visit", "physical_filter"))
498 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
500 for i, sc in enumerate(storageClasses):
501 with self.subTest(storageClass=sc.name):
502 # Create a different dataset type each time round
503 # so that a test failure in this subtest does not trigger
504 # a cascade of tests because of file clashes
505 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId)
507 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
509 datastore.put(metrics, ref)
511 baseURI, compURIs = datastore.getURIs(ref)
512 if disassembled:
513 self.assertIsNone(baseURI)
514 self.assertEqual(set(compURIs), {"data", "output", "summary"})
515 else:
516 self.assertIsNotNone(baseURI)
517 self.assertEqual(compURIs, {})
519 metrics_get = datastore.get(ref)
520 self.assertEqual(metrics_get, metrics)
522 # Retrieve the composite with read parameter
523 stop = 4
524 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
525 self.assertEqual(metrics_get.summary, metrics.summary)
526 self.assertEqual(metrics_get.output, metrics.output)
527 self.assertEqual(metrics_get.data, metrics.data[:stop])
529 # Retrieve a component
530 data = datastore.get(ref.makeComponentRef("data"))
531 self.assertEqual(data, metrics.data)
533 # On supported storage classes attempt to access a read
534 # only component
535 if "ReadComp" in sc.name:
536 cRef = ref.makeComponentRef("counter")
537 counter = datastore.get(cRef)
538 self.assertEqual(counter, len(metrics.data))
540 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
541 self.assertEqual(counter, stop)
543 datastore.remove(ref)
545 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]:
546 metrics = makeExampleMetrics()
547 datastore = self.makeDatastore()
548 # Put
549 dimensions = self.universe.conform(("visit", "physical_filter"))
550 sc = self.storageClassFactory.getStorageClass("StructuredData")
551 refs = []
552 for i in range(n_refs):
553 dataId = {
554 "instrument": "dummy",
555 "visit": 638 + i,
556 "physical_filter": "U",
557 "band": "u",
558 "day_obs": 20250101,
559 }
560 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
561 datastore.put(metrics, ref)
563 # Does it exist?
564 self.assertTrue(datastore.exists(ref))
566 # Get
567 metricsOut = datastore.get(ref)
568 self.assertEqual(metrics, metricsOut)
569 refs.append(ref)
571 return datastore, *refs
573 def testRemove(self) -> None:
574 datastore, ref = self.prepDeleteTest()
576 # Remove
577 datastore.remove(ref)
579 # Does it exist?
580 self.assertFalse(datastore.exists(ref))
582 # Do we now get a predicted URI?
583 uri = datastore.getURI(ref, predict=True)
584 self.assertEqual(uri.fragment, "predicted")
586 # Get should now fail
587 with self.assertRaises(FileNotFoundError):
588 datastore.get(ref)
589 # Can only delete once
590 with self.assertRaises(FileNotFoundError):
591 datastore.remove(ref)
593 def testForget(self) -> None:
594 datastore, ref = self.prepDeleteTest()
596 # Remove
597 datastore.forget([ref])
599 # Does it exist (as far as we know)?
600 self.assertFalse(datastore.exists(ref))
602 # Do we now get a predicted URI?
603 uri = datastore.getURI(ref, predict=True)
604 self.assertEqual(uri.fragment, "predicted")
606 # Get should now fail
607 with self.assertRaises(FileNotFoundError):
608 datastore.get(ref)
610 # Forgetting again is a silent no-op
611 datastore.forget([ref])
613 # Predicted URI should still point to the file.
614 self.assertTrue(uri.exists())
616 def testTransfer(self) -> None:
617 metrics = makeExampleMetrics()
619 dimensions = self.universe.conform(("visit", "physical_filter"))
620 dataId = {
621 "instrument": "dummy",
622 "visit": 2048,
623 "physical_filter": "Uprime",
624 "band": "u",
625 "day_obs": 20250101,
626 }
628 sc = self.storageClassFactory.getStorageClass("StructuredData")
629 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
631 inputDatastore = self.makeDatastore("test_input_datastore")
632 outputDatastore = self.makeDatastore("test_output_datastore")
634 inputDatastore.put(metrics, ref)
635 outputDatastore.transfer(inputDatastore, ref)
637 metricsOut = outputDatastore.get(ref)
638 self.assertEqual(metrics, metricsOut)
640 def testBasicTransaction(self) -> None:
641 datastore = self.makeDatastore()
642 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
643 dimensions = self.universe.conform(("visit", "physical_filter"))
644 nDatasets = 6
645 dataIds = [
646 {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v", "day_obs": 20250101}
647 for i in range(nDatasets)
648 ]
649 data = [
650 (
651 self.makeDatasetRef("metric", dimensions, storageClass, dataId),
652 makeExampleMetrics(),
653 )
654 for dataId in dataIds
655 ]
656 succeed = data[: nDatasets // 2]
657 fail = data[nDatasets // 2 :]
658 # All datasets added in this transaction should continue to exist
659 with datastore.transaction():
660 for ref, metrics in succeed:
661 datastore.put(metrics, ref)
662 # Whereas datasets added in this transaction should not
663 with self.assertRaises(TransactionTestError):
664 with datastore.transaction():
665 for ref, metrics in fail:
666 datastore.put(metrics, ref)
667 raise TransactionTestError("This should propagate out of the context manager")
668 # Check for datasets that should exist
669 for ref, metrics in succeed:
670 # Does it exist?
671 self.assertTrue(datastore.exists(ref))
672 # Get
673 metricsOut = datastore.get(ref, parameters=None)
674 self.assertEqual(metrics, metricsOut)
675 # URI
676 uri = datastore.getURI(ref)
677 self.assertEqual(uri.scheme, self.uriScheme)
678 # Check for datasets that should not exist
679 for ref, _ in fail:
680 # These should raise
681 with self.assertRaises(FileNotFoundError):
682 # non-existing file
683 datastore.get(ref)
684 with self.assertRaises(FileNotFoundError):
685 datastore.getURI(ref)
687 def testNestedTransaction(self) -> None:
688 datastore = self.makeDatastore()
689 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
690 dimensions = self.universe.conform(("visit", "physical_filter"))
691 metrics = makeExampleMetrics()
693 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v", "day_obs": 20250101}
694 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
695 datastore.put(metrics, refBefore)
696 with self.assertRaises(TransactionTestError):
697 with datastore.transaction():
698 dataId = {
699 "instrument": "dummy",
700 "visit": 1,
701 "physical_filter": "V",
702 "band": "v",
703 "day_obs": 20250101,
704 }
705 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
706 datastore.put(metrics, refOuter)
707 with datastore.transaction():
708 dataId = {
709 "instrument": "dummy",
710 "visit": 2,
711 "physical_filter": "V",
712 "band": "v",
713 "day_obs": 20250101,
714 }
715 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
716 datastore.put(metrics, refInner)
717 # All datasets should exist
718 for ref in (refBefore, refOuter, refInner):
719 metricsOut = datastore.get(ref, parameters=None)
720 self.assertEqual(metrics, metricsOut)
721 raise TransactionTestError("This should roll back the transaction")
722 # Dataset(s) inserted before the transaction should still exist
723 metricsOut = datastore.get(refBefore, parameters=None)
724 self.assertEqual(metrics, metricsOut)
725 # But all datasets inserted during the (rolled back) transaction
726 # should be gone
727 with self.assertRaises(FileNotFoundError):
728 datastore.get(refOuter)
729 with self.assertRaises(FileNotFoundError):
730 datastore.get(refInner)
732 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]:
733 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
734 dimensions = self.universe.conform(("visit", "physical_filter"))
735 metrics = makeExampleMetrics()
736 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v", "day_obs": 20250101}
737 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
738 return metrics, ref
740 def runIngestTest(self, func: Callable[[MetricsExample, str, DatasetRef], None]) -> None:
741 metrics, ref = self._prepareIngestTest()
742 # The file will be deleted after the test.
743 # For symlink tests this leads to a situation where the datastore
744 # points to a file that does not exist. This will make os.path.exist
745 # return False but then the new symlink will fail with
746 # FileExistsError later in the code so the test still passes.
747 with _temp_yaml_file(metrics._asdict()) as path:
748 func(metrics, path, ref)
750 def testIngestNoTransfer(self) -> None:
751 """Test ingesting existing files with no transfer."""
752 for mode in (None, "auto"):
753 # Some datastores have auto but can't do in place transfer
754 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
755 continue
757 with self.subTest(mode=mode):
758 datastore = self.makeDatastore()
760 def succeed(
761 obj: MetricsExample,
762 path: str,
763 ref: DatasetRef,
764 mode: str | None = mode,
765 datastore: Datastore = datastore,
766 ) -> None:
767 """Ingest a file already in the datastore root."""
768 # first move it into the root, and adjust the path
769 # accordingly
770 path = shutil.copy(path, datastore.root.ospath)
771 path = os.path.relpath(path, start=datastore.root.ospath)
772 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
773 self.assertEqual(obj, datastore.get(ref))
775 def failInputDoesNotExist(
776 obj: MetricsExample,
777 path: str,
778 ref: DatasetRef,
779 mode: str | None = mode,
780 datastore: Datastore = datastore,
781 ) -> None:
782 """Can't ingest files if we're given a bad path."""
783 with self.assertRaises(FileNotFoundError):
784 datastore.ingest(
785 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
786 )
787 self.assertFalse(datastore.exists(ref))
789 def failOutsideRoot(
790 obj: MetricsExample,
791 path: str,
792 ref: DatasetRef,
793 mode: str | None = mode,
794 datastore: Datastore = datastore,
795 ) -> None:
796 """Can't ingest files outside of datastore root unless
797 auto.
798 """
799 if mode == "auto":
800 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
801 self.assertTrue(datastore.exists(ref))
802 else:
803 with self.assertRaises(RuntimeError):
804 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
805 self.assertFalse(datastore.exists(ref))
807 def failNotImplemented(
808 obj: MetricsExample,
809 path: str,
810 ref: DatasetRef,
811 mode: str | None = mode,
812 datastore: Datastore = datastore,
813 ) -> None:
814 with self.assertRaises(NotImplementedError):
815 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
817 if mode in self.ingestTransferModes:
818 self.runIngestTest(failOutsideRoot)
819 self.runIngestTest(failInputDoesNotExist)
820 self.runIngestTest(succeed)
821 else:
822 self.runIngestTest(failNotImplemented)
824 def testIngestTransfer(self) -> None:
825 """Test ingesting existing files after transferring them."""
826 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
827 with self.subTest(mode=mode):
828 datastore = self.makeDatastore(mode)
830 def succeed(
831 obj: MetricsExample,
832 path: str,
833 ref: DatasetRef,
834 mode: str | None = mode,
835 datastore: Datastore = datastore,
836 ) -> None:
837 """Ingest a file by transferring it to the template
838 location.
839 """
840 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
841 self.assertEqual(obj, datastore.get(ref))
842 file_exists = os.path.exists(path)
843 if mode == "move":
844 self.assertFalse(file_exists)
845 else:
846 self.assertTrue(file_exists)
848 def failInputDoesNotExist(
849 obj: MetricsExample,
850 path: str,
851 ref: DatasetRef,
852 mode: str | None = mode,
853 datastore: Datastore = datastore,
854 ) -> None:
855 """Can't ingest files if we're given a bad path."""
856 with self.assertRaises(FileNotFoundError):
857 # Ensure the file does not look like it is in
858 # datastore for auto mode
859 datastore.ingest(
860 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
861 )
862 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
864 def failNotImplemented(
865 obj: MetricsExample,
866 path: str,
867 ref: DatasetRef,
868 mode: str | None = mode,
869 datastore: Datastore = datastore,
870 ) -> None:
871 with self.assertRaises(NotImplementedError):
872 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
874 if mode in self.ingestTransferModes:
875 self.runIngestTest(failInputDoesNotExist)
876 self.runIngestTest(succeed)
877 else:
878 self.runIngestTest(failNotImplemented)
880 def testIngestSymlinkOfSymlink(self) -> None:
881 """Special test for symlink to a symlink ingest"""
882 metrics, ref = self._prepareIngestTest()
883 # The aim of this test is to create a dataset on disk, then
884 # create a symlink to it and finally ingest the symlink such that
885 # the symlink in the datastore points to the original dataset.
886 for mode in ("symlink", "relsymlink"):
887 if mode not in self.ingestTransferModes:
888 continue
890 print(f"Trying mode {mode}")
891 with _temp_yaml_file(metrics._asdict()) as realpath:
892 with tempfile.TemporaryDirectory() as tmpdir:
893 sympath = os.path.join(tmpdir, "symlink.yaml")
894 os.symlink(os.path.realpath(realpath), sympath)
896 datastore = self.makeDatastore()
897 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
899 uri = datastore.getURI(ref)
900 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
901 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
903 linkTarget = os.readlink(uri.ospath)
904 if mode == "relsymlink":
905 self.assertFalse(os.path.isabs(linkTarget))
906 else:
907 self.assertTrue(os.path.samefile(linkTarget, realpath))
909 # Check that we can get the dataset back regardless of mode
910 metric2 = datastore.get(ref)
911 self.assertEqual(metric2, metrics)
913 # Cleanup the file for next time round loop
914 # since it will get the same file name in store
915 datastore.remove(ref)
917 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
918 datastore = self.makeDatastore(name)
920 # For now only the FileDatastore can be used for this test.
921 # ChainedDatastore that only includes InMemoryDatastores have to be
922 # skipped as well.
923 for name in datastore.names:
924 if not name.startswith("InMemoryDatastore"):
925 break
926 else:
927 raise unittest.SkipTest("in-memory datastore does not support record export/import")
929 metrics = makeExampleMetrics()
930 dimensions = self.universe.conform(("visit", "physical_filter"))
931 sc = self.storageClassFactory.getStorageClass("StructuredData")
933 refs = []
934 for visit in (2048, 2049, 2050):
935 dataId = {
936 "instrument": "dummy",
937 "visit": visit,
938 "physical_filter": "Uprime",
939 "band": "u",
940 "day_obs": 20250101,
941 }
942 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
943 datastore.put(metrics, ref)
944 refs.append(ref)
945 return datastore, refs
947 def testExportImportRecords(self) -> None:
948 """Test for export_records and import_records methods."""
949 datastore, refs = self._populate_export_datastore("test_datastore")
950 for exported_refs in (refs, refs[1:]):
951 n_refs = len(exported_refs)
952 records = datastore.export_records(exported_refs)
953 self.assertGreater(len(records), 0)
954 self.assertTrue(set(records.keys()) <= set(datastore.names))
955 # In a ChainedDatastore each FileDatastore will have a complete set
956 for datastore_name in records:
957 record_data = records[datastore_name]
958 self.assertEqual(len(record_data.records), n_refs)
960 # Check that subsetting works, include non-existing dataset ID.
961 dataset_ids = {exported_refs[0].id, uuid.uuid4()}
962 subset = record_data.subset(dataset_ids)
963 assert subset is not None
964 self.assertEqual(len(subset.records), 1)
965 subset = record_data.subset({uuid.uuid4()})
966 self.assertIsNone(subset)
968 # Use the same datastore name to import relative path.
969 datastore2 = self.makeDatastore("test_datastore")
971 records = datastore.export_records(refs[1:])
972 datastore2.import_records(records)
974 with self.assertRaises(FileNotFoundError):
975 data = datastore2.get(refs[0])
976 data = datastore2.get(refs[1])
977 self.assertIsNotNone(data)
978 data = datastore2.get(refs[2])
979 self.assertIsNotNone(data)
981 def testExport(self) -> None:
982 datastore, refs = self._populate_export_datastore("test_datastore")
984 datasets = list(datastore.export(refs))
985 self.assertEqual(len(datasets), 3)
987 for transfer in (None, "auto"):
988 # Both will default to None
989 datasets = list(datastore.export(refs, transfer=transfer))
990 self.assertEqual(len(datasets), 3)
992 with self.assertRaises(TypeError):
993 list(datastore.export(refs, transfer="copy"))
995 with self.assertRaises(TypeError):
996 list(datastore.export(refs, directory="exportDir", transfer="move"))
998 # Create a new ref that is not known to the datastore and try to
999 # export it.
1000 sc = self.storageClassFactory.getStorageClass("ThingOne")
1001 dimensions = self.universe.conform(("visit", "physical_filter"))
1002 dataId = {
1003 "instrument": "dummy",
1004 "visit": 52,
1005 "physical_filter": "V",
1006 "band": "v",
1007 "day_obs": 20250101,
1008 }
1009 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
1010 with self.assertRaises(FileNotFoundError):
1011 list(datastore.export(refs + [ref], transfer=None))
1013 def test_pydantic_dict_storage_class_conversions(self) -> None:
1014 """Test converting a dataset stored as a pydantic model into a dict on
1015 read.
1016 """
1017 datastore = self.makeDatastore()
1018 store_as_model = self.makeDatasetRef(
1019 "store_as_model",
1020 dimensions=self.universe.empty,
1021 storageClass="DictConvertibleModel",
1022 dataId=DataCoordinate.make_empty(self.universe),
1023 )
1024 content = {"a": "one", "b": "two"}
1025 model = DictConvertibleModel.from_dict(content, extra="original content")
1026 datastore.put(model, store_as_model)
1027 retrieved_model = datastore.get(store_as_model)
1028 self.assertEqual(retrieved_model, model)
1029 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
1030 self.assertEqual(type(loaded), dict)
1031 self.assertEqual(loaded, content)
1033 def test_simple_class_put_get(self) -> None:
1034 """Test that we can put and get a simple class with dict()
1035 constructor.
1036 """
1037 datastore = self.makeDatastore()
1038 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
1039 self._assert_different_puts(datastore, "MetricsExample", data)
1041 def test_dataclass_put_get(self) -> None:
1042 """Test that we can put and get a simple dataclass."""
1043 datastore = self.makeDatastore()
1044 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
1045 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
1047 def test_pydantic_put_get(self) -> None:
1048 """Test that we can put and get a simple Pydantic model."""
1049 datastore = self.makeDatastore()
1050 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
1051 self._assert_different_puts(datastore, "MetricsExampleModel", data)
1053 def test_tuple_put_get(self) -> None:
1054 """Test that we can put and get a tuple."""
1055 datastore = self.makeDatastore()
1056 data = ("a", "b", 1)
1057 self._assert_different_puts(datastore, "TupleExample", data)
1059 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None:
1060 refs = {
1061 x: self.makeDatasetRef(
1062 f"stora_as_{x}",
1063 dimensions=self.universe.empty,
1064 storageClass=f"{storageClass_root}{x}",
1065 dataId=DataCoordinate.make_empty(self.universe),
1066 )
1067 for x in ["A", "B"]
1068 }
1070 for ref in refs.values():
1071 datastore.put(data, ref)
1073 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
1076class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
1077 """PosixDatastore specialization"""
1079 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1080 uriScheme = "file"
1081 canIngestNoTransferAuto = True
1082 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
1083 isEphemeral = False
1084 rootKeys = ("root",)
1085 validationCanFail = True
1087 def setUp(self) -> None:
1088 # The call to os.path.realpath is necessary because Mac temporary files
1089 # can end up in either /private/var/folders or /var/folders, which
1090 # refer to the same location but don't appear to.
1091 # This matters for "relsymlink" transfer mode, because it needs to be
1092 # able to read the file through a relative symlink, but some of the
1093 # intermediate directories are not traversable if you try to get from a
1094 # tempfile in /var/folders to one in /private/var/folders via a
1095 # relative path.
1096 self.root = os.path.realpath(self.enterContext(tempfile.TemporaryDirectory()))
1097 super().setUp()
1099 def testAtomicWrite(self) -> None:
1100 """Test that we write to a temporary and then rename"""
1101 datastore = self.makeDatastore()
1102 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1103 dimensions = self.universe.conform(("visit", "physical_filter"))
1104 metrics = makeExampleMetrics()
1106 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v", "day_obs": 20250101}
1107 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1109 with self.assertLogs("lsst.resources", "DEBUG") as cm:
1110 datastore.put(metrics, ref)
1111 move_logs = [ll for ll in cm.output if "transfer=" in ll]
1112 self.assertIn("transfer=move", move_logs[0])
1114 # And the transfer should be file to file.
1115 self.assertEqual(move_logs[0].count("file://"), 2)
1117 def testCanNotDeterminePutFormatterLocation(self) -> None:
1118 """Verify that the expected exception is raised if the FileDatastore
1119 can not determine the put formatter location.
1120 """
1121 _ = makeExampleMetrics()
1122 datastore = self.makeDatastore()
1124 # Create multiple storage classes for testing different formulations
1125 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1127 sccomp = StorageClass("Dummy")
1128 compositeStorageClass = StorageClass(
1129 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1130 )
1132 dimensions = self.universe.conform(("visit", "physical_filter"))
1133 dataId = {
1134 "instrument": "dummy",
1135 "visit": 52,
1136 "physical_filter": "V",
1137 "band": "v",
1138 "day_obs": 20250101,
1139 }
1141 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1142 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId)
1144 def raiser(ref: DatasetRef) -> None:
1145 raise DatasetTypeNotSupportedError()
1147 with unittest.mock.patch.object(
1148 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1149 "_determine_put_formatter_location",
1150 side_effect=raiser,
1151 ):
1152 # verify the non-composite ref execution path:
1153 with self.assertRaises(DatasetTypeNotSupportedError):
1154 datastore.getURIs(ref, predict=True)
1156 # verify the composite-ref execution path:
1157 with self.assertRaises(DatasetTypeNotSupportedError):
1158 datastore.getURIs(compRef, predict=True)
1160 def test_roots(self):
1161 datastore = self.makeDatastore()
1163 self.assertEqual(set(datastore.names), set(datastore.roots.keys()))
1164 for root in datastore.roots.values():
1165 if root is not None:
1166 self.assertTrue(root.exists())
1168 def test_prepare_get_for_external_client(self):
1169 datastore = self.makeDatastore()
1170 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1171 dimensions = self.universe.conform(("visit", "physical_filter"))
1172 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
1173 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1174 # Most of the coverage for this function is in test_server.py,
1175 # because it requires a file backend that supports URL signing.
1176 self.assertIsNone(datastore.prepare_get_for_external_client(ref))
1179class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1180 """Posix datastore tests but with checksums disabled."""
1182 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1184 def testChecksum(self) -> None:
1185 """Ensure that checksums have not been calculated."""
1186 datastore = self.makeDatastore()
1187 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1188 dimensions = self.universe.conform(("visit", "physical_filter"))
1189 metrics = makeExampleMetrics()
1191 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v", "day_obs": 20250101}
1192 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1194 # Configuration should have disabled checksum calculation
1195 datastore.put(metrics, ref)
1196 infos = datastore.getStoredItemsInfo(ref)
1197 self.assertIsNone(infos[0].checksum)
1199 # Remove put back but with checksums enabled explicitly
1200 datastore.remove(ref)
1201 datastore.useChecksum = True
1202 datastore.put(metrics, ref)
1204 infos = datastore.getStoredItemsInfo(ref)
1205 self.assertIsNotNone(infos[0].checksum)
1207 def test_repeat_ingest(self):
1208 """Test that repeatedly ingesting the same file in direct mode
1209 is allowed.
1211 Test can only run with FileDatastore since that is the only one
1212 supporting "direct" ingest.
1213 """
1214 metrics, v4ref = self._prepareIngestTest()
1215 datastore = self.makeDatastore()
1216 v5ref = DatasetRef(
1217 v4ref.datasetType, v4ref.dataId, v4ref.run, id_generation_mode=DatasetIdGenEnum.DATAID_TYPE_RUN
1218 )
1220 with _temp_yaml_file(metrics._asdict()) as path:
1221 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct")
1223 # This will fail because the ref is using UUIDv4.
1224 with self.assertRaises(RuntimeError):
1225 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct")
1227 # UUIDv5 can be repeatedly ingested in direct mode.
1228 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct")
1229 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct")
1231 with self.assertRaises(RuntimeError):
1232 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="copy")
1235class TrashDatastoreTestCase(PosixDatastoreTestCase):
1236 """Restrict trash test to FileDatastore."""
1238 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1240 def testTrash(self) -> None:
1241 datastore, *refs = self.prepDeleteTest(n_refs=10)
1243 # Trash one of them.
1244 ref = refs.pop()
1245 uri = datastore.getURI(ref)
1246 datastore.trash(ref)
1247 self.assertTrue(uri.exists(), uri) # Not deleted yet
1248 datastore.emptyTrash()
1249 self.assertFalse(uri.exists(), uri)
1251 # Trash it again should be fine.
1252 datastore.trash(ref)
1254 # Trash multiple items at once.
1255 subset = [refs.pop(), refs.pop()]
1256 datastore.trash(subset)
1257 datastore.emptyTrash()
1259 # Remove a record and trash should do nothing.
1260 # This is execution butler scenario.
1261 ref = refs.pop()
1262 uri = datastore.getURI(ref)
1263 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1264 self.assertTrue(uri.exists())
1265 datastore.trash(ref)
1266 datastore.emptyTrash()
1267 self.assertTrue(uri.exists())
1269 # Switch on trust and it should delete the file.
1270 datastore.trustGetRequest = True
1271 datastore.trash([ref])
1272 self.assertFalse(uri.exists())
1274 # Remove multiples at once in trust mode.
1275 subset = [refs.pop() for i in range(3)]
1276 datastore.trash(subset)
1277 datastore.trash(refs.pop()) # Check that a single ref can trash
1280class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1281 """Test datastore cleans up on failure."""
1283 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1285 def setUp(self) -> None:
1286 # Override the working directory before calling the base class
1287 self.root = tempfile.mkdtemp()
1288 super().setUp()
1290 def testCleanup(self) -> None:
1291 """Test that a failed formatter write does cleanup a partial file."""
1292 metrics = makeExampleMetrics()
1293 datastore = self.makeDatastore()
1295 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1297 dimensions = self.universe.conform(("visit", "physical_filter"))
1298 dataId = {
1299 "instrument": "dummy",
1300 "visit": 52,
1301 "physical_filter": "V",
1302 "band": "v",
1303 "day_obs": 20250101,
1304 }
1306 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1308 # Determine where the file will end up (we assume Formatters use
1309 # the same file extension)
1310 expectedUri = datastore.getURI(ref, predict=True)
1311 self.assertEqual(expectedUri.fragment, "predicted")
1313 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1315 # Try formatter that fails and formatter that fails and leaves
1316 # a file behind
1317 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1318 with self.subTest(formatter=formatter):
1319 # Monkey patch the formatter
1320 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1322 # Try to put the dataset, it should fail
1323 with self.assertRaises(RuntimeError):
1324 datastore.put(metrics, ref)
1326 # Check that there is no file on disk
1327 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1329 # Check that there is a directory
1330 dir = expectedUri.dirname()
1331 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1333 # Force YamlFormatter and check that this time a file is written
1334 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1335 datastore.put(metrics, ref)
1336 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1337 datastore.remove(ref)
1338 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1341class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1342 """PosixDatastore specialization"""
1344 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1345 uriScheme = "mem"
1346 hasUnsupportedPut = False
1347 ingestTransferModes = ()
1348 isEphemeral = True
1349 rootKeys = None
1350 validationCanFail = False
1353class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1354 """ChainedDatastore specialization using a POSIXDatastore"""
1356 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1357 hasUnsupportedPut = False
1358 canIngestNoTransferAuto = False
1359 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1360 isEphemeral = False
1361 rootKeys = (".datastores.1.root", ".datastores.2.root")
1362 validationCanFail = True
1365class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1366 """ChainedDatastore specialization using all InMemoryDatastore"""
1368 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1369 validationCanFail = False
1372class DatastoreConstraintsTests(DatastoreTestsBase):
1373 """Basic tests of constraints model of Datastores."""
1375 def testConstraints(self) -> None:
1376 """Test constraints model. Assumes that each test class has the
1377 same constraints.
1378 """
1379 metrics = makeExampleMetrics()
1380 datastore = self.makeDatastore()
1382 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1383 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1384 dimensions = self.universe.conform(("visit", "physical_filter", "instrument"))
1385 dataId = {
1386 "visit": 52,
1387 "physical_filter": "V",
1388 "band": "v",
1389 "instrument": "DummyCamComp",
1390 "day_obs": 20250101,
1391 }
1393 # Write empty file suitable for ingest check (JSON and YAML variants)
1394 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1395 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1396 for datasetTypeName, sc, accepted in (
1397 ("metric", sc1, True),
1398 ("metric5", sc1, False),
1399 ("metric33", sc1, True),
1400 ("metric5", sc2, True),
1401 ):
1402 # Choose different temp file depending on StorageClass
1403 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1405 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1406 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
1407 if accepted:
1408 datastore.put(metrics, ref)
1409 self.assertTrue(datastore.exists(ref))
1410 datastore.remove(ref)
1412 # Try ingest
1413 if self.canIngest:
1414 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1415 self.assertTrue(datastore.exists(ref))
1416 datastore.remove(ref)
1417 else:
1418 with self.assertRaises(DatasetTypeNotSupportedError):
1419 datastore.put(metrics, ref)
1420 self.assertFalse(datastore.exists(ref))
1422 # Again with ingest
1423 if self.canIngest:
1424 with self.assertRaises(DatasetTypeNotSupportedError):
1425 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1426 self.assertFalse(datastore.exists(ref))
1429class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1430 """PosixDatastore specialization"""
1432 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1433 canIngest = True
1435 def setUp(self) -> None:
1436 # Override the working directory before calling the base class
1437 self.root = tempfile.mkdtemp()
1438 super().setUp()
1441class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1442 """InMemoryDatastore specialization."""
1444 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1445 canIngest = False
1448class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1449 """ChainedDatastore specialization using a POSIXDatastore and constraints
1450 at the ChainedDatstore.
1451 """
1453 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1456class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1457 """ChainedDatastore specialization using a POSIXDatastore."""
1459 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1462class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1463 """ChainedDatastore specialization using all InMemoryDatastore."""
1465 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1466 canIngest = False
1469class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1470 """Test that a chained datastore can control constraints per-datastore
1471 even if child datastore would accept.
1472 """
1474 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1476 def setUp(self) -> None:
1477 # Override the working directory before calling the base class
1478 self.root = tempfile.mkdtemp()
1479 super().setUp()
1481 def testConstraints(self) -> None:
1482 """Test chained datastore constraints model."""
1483 metrics = makeExampleMetrics()
1484 datastore = self.makeDatastore()
1486 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1487 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1488 dimensions = self.universe.conform(("visit", "physical_filter", "instrument"))
1489 dataId1 = {
1490 "visit": 52,
1491 "physical_filter": "V",
1492 "band": "v",
1493 "instrument": "DummyCamComp",
1494 "day_obs": 20250101,
1495 }
1496 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC", "day_obs": 20250101}
1498 # Write empty file suitable for ingest check (JSON and YAML variants)
1499 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1500 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1502 for typeName, dataId, sc, accept, ingest in (
1503 ("metric", dataId1, sc1, (False, True, False), True),
1504 ("metric5", dataId1, sc1, (False, False, False), False),
1505 ("metric5", dataId2, sc1, (True, False, False), False),
1506 ("metric33", dataId2, sc2, (True, True, False), True),
1507 ("metric5", dataId1, sc2, (False, True, False), True),
1508 ):
1509 # Choose different temp file depending on StorageClass
1510 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1512 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1513 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId)
1514 if any(accept):
1515 datastore.put(metrics, ref)
1516 self.assertTrue(datastore.exists(ref))
1518 # Check each datastore inside the chained datastore
1519 for childDatastore, expected in zip(datastore.datastores, accept, strict=True):
1520 self.assertEqual(
1521 childDatastore.exists(ref),
1522 expected,
1523 f"Testing presence of {ref} in datastore {childDatastore.name}",
1524 )
1526 datastore.remove(ref)
1528 # Check that ingest works
1529 if ingest:
1530 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1531 self.assertTrue(datastore.exists(ref))
1533 # Check each datastore inside the chained datastore
1534 for childDatastore, expected in zip(datastore.datastores, accept, strict=True):
1535 # Ephemeral datastores means InMemory at the moment
1536 # and that does not accept ingest of files.
1537 if childDatastore.isEphemeral:
1538 expected = False
1539 self.assertEqual(
1540 childDatastore.exists(ref),
1541 expected,
1542 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1543 )
1545 datastore.remove(ref)
1546 else:
1547 with self.assertRaises(DatasetTypeNotSupportedError):
1548 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1550 else:
1551 with self.assertRaises(DatasetTypeNotSupportedError):
1552 datastore.put(metrics, ref)
1553 self.assertFalse(datastore.exists(ref))
1555 # Again with ingest
1556 with self.assertRaises(DatasetTypeNotSupportedError):
1557 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1558 self.assertFalse(datastore.exists(ref))
1561class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1562 """Tests for datastore caching infrastructure."""
1564 @classmethod
1565 def setUpClass(cls) -> None:
1566 cls.storageClassFactory = StorageClassFactory()
1567 cls.universe = DimensionUniverse()
1569 # Ensure that we load the test storage class definitions.
1570 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1571 cls.storageClassFactory.addFromConfig(scConfigFile)
1573 def setUp(self) -> None:
1574 self.id = 0
1576 # Create a root that we can use for caching tests.
1577 self.root = tempfile.mkdtemp()
1579 # Create some test dataset refs and associated test files
1580 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1581 dimensions = self.universe.conform(("visit", "physical_filter"))
1582 dataId = {
1583 "instrument": "dummy",
1584 "visit": 52,
1585 "physical_filter": "V",
1586 "band": "v",
1587 "day_obs": 20250101,
1588 }
1590 # Create list of refs and list of temporary files
1591 n_datasets = 10
1592 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)]
1594 root_uri = ResourcePath(self.root, forceDirectory=True)
1595 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1597 # Create test files.
1598 for uri in self.files:
1599 uri.write(b"0123456789")
1601 # Create some composite refs with component files.
1602 sc = self.storageClassFactory.getStorageClass("StructuredData")
1603 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)]
1604 self.comp_files = []
1605 self.comp_refs = []
1606 for n, ref in enumerate(self.composite_refs):
1607 component_refs = []
1608 component_files = []
1609 for component in sc.components:
1610 component_ref = ref.makeComponentRef(component)
1611 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1612 component_refs.append(component_ref)
1613 component_files.append(file)
1614 file.write(b"9876543210")
1616 self.comp_files.append(component_files)
1617 self.comp_refs.append(component_refs)
1619 def tearDown(self) -> None:
1620 if self.root is not None and os.path.exists(self.root):
1621 shutil.rmtree(self.root, ignore_errors=True)
1623 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1624 config = Config.fromYaml(config_str)
1625 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1627 def testNoCacheDir(self) -> None:
1628 config_str = """
1629cached:
1630 root: null
1631 cacheable:
1632 metric0: true
1633 """
1634 cache_manager = self._make_cache_manager(config_str)
1636 # Look inside to check we don't have a cache directory
1637 self.assertIsNone(cache_manager._cache_directory)
1639 self.assertCache(cache_manager)
1641 # Test that the cache directory is marked temporary
1642 self.assertTrue(cache_manager.cache_directory.isTemporary)
1644 def testNoCacheDirReversed(self) -> None:
1645 """Use default caching status and metric1 to false"""
1646 config_str = """
1647cached:
1648 root: null
1649 default: true
1650 cacheable:
1651 metric1: false
1652 """
1653 cache_manager = self._make_cache_manager(config_str)
1655 self.assertCache(cache_manager)
1657 def testEnvvarCacheDir(self) -> None:
1658 config_str = f"""
1659cached:
1660 root: '{self.root}'
1661 cacheable:
1662 metric0: true
1663 """
1665 root = ResourcePath(self.root, forceDirectory=True)
1666 env_dir = root.join("somewhere", forceDirectory=True)
1667 elsewhere = root.join("elsewhere", forceDirectory=True)
1669 # Environment variable should override the config value.
1670 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1671 cache_manager = self._make_cache_manager(config_str)
1672 self.assertEqual(cache_manager.cache_directory, env_dir)
1674 # This environment variable should not override the config value.
1675 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1676 cache_manager = self._make_cache_manager(config_str)
1677 self.assertEqual(cache_manager.cache_directory, root)
1679 # No default setting.
1680 config_str = """
1681cached:
1682 root: null
1683 default: true
1684 cacheable:
1685 metric1: false
1686 """
1687 cache_manager = self._make_cache_manager(config_str)
1689 # This environment variable should override the config value.
1690 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1691 cache_manager = self._make_cache_manager(config_str)
1692 self.assertEqual(cache_manager.cache_directory, env_dir)
1694 # If both environment variables are set the main (not IF_UNSET)
1695 # variable should win.
1696 with unittest.mock.patch.dict(
1697 os.environ,
1698 {
1699 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1700 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1701 },
1702 ):
1703 cache_manager = self._make_cache_manager(config_str)
1704 self.assertEqual(cache_manager.cache_directory, env_dir)
1706 # Use the API to set the environment variable, making sure that the
1707 # variable is reset on exit.
1708 with unittest.mock.patch.dict(
1709 os.environ,
1710 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1711 ):
1712 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1713 self.assertTrue(defined)
1714 cache_manager = self._make_cache_manager(config_str)
1715 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1717 # Now create the cache manager ahead of time and set the fallback
1718 # later.
1719 cache_manager = self._make_cache_manager(config_str)
1720 self.assertIsNone(cache_manager._cache_directory)
1721 with unittest.mock.patch.dict(
1722 os.environ,
1723 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1724 ):
1725 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1726 self.assertTrue(defined)
1727 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1729 def testExplicitCacheDir(self) -> None:
1730 config_str = f"""
1731cached:
1732 root: '{self.root}'
1733 cacheable:
1734 metric0: true
1735 """
1736 cache_manager = self._make_cache_manager(config_str)
1738 # Look inside to check we do have a cache directory.
1739 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1741 self.assertCache(cache_manager)
1743 # Test that the cache directory is not marked temporary
1744 self.assertFalse(cache_manager.cache_directory.isTemporary)
1746 def assertCache(self, cache_manager: DatastoreCacheManager) -> None:
1747 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1748 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1750 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1751 self.assertIsInstance(uri, ResourcePath)
1752 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1754 # Check presence in cache using ref and then using file extension.
1755 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1756 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1757 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1758 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1760 # Cached file should no longer exist but uncached file should be
1761 # unaffected.
1762 self.assertFalse(self.files[0].exists())
1763 self.assertTrue(self.files[1].exists())
1765 # Should find this file and it should be within the cache directory.
1766 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1767 self.assertTrue(found.exists())
1768 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1770 # Should not be able to find these in cache
1771 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1772 self.assertIsNone(found)
1773 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1774 self.assertIsNone(found)
1776 def testNoCache(self) -> None:
1777 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1778 for uri, ref in zip(self.files, self.refs, strict=True):
1779 self.assertFalse(cache_manager.should_be_cached(ref))
1780 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1781 self.assertFalse(cache_manager.known_to_cache(ref))
1782 with cache_manager.find_in_cache(ref, ".txt") as found:
1783 self.assertIsNone(found, msg=f"{cache_manager}")
1785 def _expiration_config(self, mode: str, threshold: int) -> str:
1786 return f"""
1787cached:
1788 default: true
1789 expiry:
1790 mode: {mode}
1791 threshold: {threshold}
1792 cacheable:
1793 unused: true
1794 """
1796 def testCacheExpiryFiles(self) -> None:
1797 threshold = 2 # Keep at least 2 files.
1798 mode = "files"
1799 config_str = self._expiration_config(mode, threshold)
1801 cache_manager = self._make_cache_manager(config_str)
1803 # Check that an empty cache returns unknown for arbitrary ref
1804 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1806 # Should end with datasets: 2, 3, 4
1807 self.assertExpiration(cache_manager, 5, threshold + 1)
1808 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1810 # Check that we will not expire a file that is actively in use.
1811 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1812 self.assertIsNotNone(found)
1814 # Trigger cache expiration that should remove the file
1815 # we just retrieved. Should now have: 3, 4, 5
1816 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1817 self.assertIsNotNone(cached)
1819 # Cache should still report the standard file count.
1820 self.assertEqual(cache_manager.file_count, threshold + 1)
1822 # Add additional entry to cache.
1823 # Should now have 4, 5, 6
1824 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1825 self.assertIsNotNone(cached)
1827 # Is the file still there?
1828 self.assertTrue(found.exists())
1830 # Can we read it?
1831 data = found.read()
1832 self.assertGreater(len(data), 0)
1834 # Outside context the file should no longer exist.
1835 self.assertFalse(found.exists())
1837 # File count should not have changed.
1838 self.assertEqual(cache_manager.file_count, threshold + 1)
1840 # Dataset 2 was in the exempt directory but because hardlinks
1841 # are used it was deleted from the main cache during cache expiry
1842 # above and so should no longer be found.
1843 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1844 self.assertIsNone(found)
1846 # And the one stored after it is also gone.
1847 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1848 self.assertIsNone(found)
1850 # But dataset 4 is present.
1851 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1852 self.assertIsNotNone(found)
1854 # Adding a new dataset to the cache should now delete it.
1855 cache_manager.move_to_cache(self.files[7], self.refs[7])
1857 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1858 self.assertIsNone(found)
1860 def testCacheExpiryDatasets(self) -> None:
1861 threshold = 2 # Keep 2 datasets.
1862 mode = "datasets"
1863 config_str = self._expiration_config(mode, threshold)
1865 cache_manager = self._make_cache_manager(config_str)
1866 self.assertExpiration(cache_manager, 5, threshold + 1)
1867 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1869 def testCacheExpiryDatasetsComposite(self) -> None:
1870 threshold = 2 # Keep 2 datasets.
1871 mode = "datasets"
1872 config_str = self._expiration_config(mode, threshold)
1874 cache_manager = self._make_cache_manager(config_str)
1876 n_datasets = 3
1877 for i in range(n_datasets):
1878 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True):
1879 cached = cache_manager.move_to_cache(component_file, component_ref)
1880 self.assertIsNotNone(cached)
1881 self.assertTrue(cache_manager.known_to_cache(component_ref))
1882 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1883 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1885 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1887 # Write two new non-composite and the number of files should drop.
1888 self.assertExpiration(cache_manager, 2, 5)
1890 def testCacheExpirySize(self) -> None:
1891 threshold = 55 # Each file is 10 bytes
1892 mode = "size"
1893 config_str = self._expiration_config(mode, threshold)
1895 cache_manager = self._make_cache_manager(config_str)
1896 self.assertExpiration(cache_manager, 10, 6)
1897 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1899 def assertExpiration(
1900 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int
1901 ) -> None:
1902 """Insert the datasets and then check the number retained."""
1903 for i in range(n_datasets):
1904 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1905 self.assertIsNotNone(cached)
1907 self.assertEqual(cache_manager.file_count, n_retained)
1909 # The oldest file should not be in the cache any more.
1910 for i in range(n_datasets):
1911 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1912 if i >= n_datasets - n_retained:
1913 self.assertIsInstance(found, ResourcePath)
1914 else:
1915 self.assertIsNone(found)
1917 def testCacheExpiryAge(self) -> None:
1918 threshold = 1 # Expire older than 2 seconds
1919 mode = "age"
1920 config_str = self._expiration_config(mode, threshold)
1922 cache_manager = self._make_cache_manager(config_str)
1923 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1925 # Insert 3 files, then sleep, then insert more.
1926 for i in range(2):
1927 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1928 self.assertIsNotNone(cached)
1929 time.sleep(2.0)
1930 for j in range(4):
1931 i = 2 + j # Continue the counting
1932 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1933 self.assertIsNotNone(cached)
1935 # Only the files written after the sleep should exist.
1936 self.assertEqual(cache_manager.file_count, 4)
1937 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1938 self.assertIsNone(found)
1939 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1940 self.assertIsInstance(found, ResourcePath)
1943class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase):
1944 """Test the null datastore."""
1946 storageClassFactory = StorageClassFactory()
1948 def test_basics(self) -> None:
1949 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1950 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {})
1952 null = NullDatastore(None, None)
1954 self.assertFalse(null.exists(ref))
1955 self.assertFalse(null.knows(ref))
1956 knows = null.knows_these([ref])
1957 self.assertFalse(knows[ref])
1958 null.validateConfiguration(ref)
1960 with self.assertRaises(FileNotFoundError):
1961 null.get(ref)
1962 with self.assertRaises(NotImplementedError):
1963 null.put("", ref)
1964 with self.assertRaises(FileNotFoundError):
1965 null.getURI(ref)
1966 with self.assertRaises(FileNotFoundError):
1967 null.getURIs(ref)
1968 with self.assertRaises(FileNotFoundError):
1969 null.getManyURIs([ref])
1970 with self.assertRaises(NotImplementedError):
1971 null.getLookupKeys()
1972 with self.assertRaises(NotImplementedError):
1973 null.import_records({})
1974 with self.assertRaises(NotImplementedError):
1975 null.export_records([])
1976 with self.assertRaises(NotImplementedError):
1977 null.export([ref])
1978 with self.assertRaises(NotImplementedError):
1979 null.transfer(null, ref)
1980 with self.assertRaises(NotImplementedError):
1981 null.emptyTrash()
1982 with self.assertRaises(NotImplementedError):
1983 null.trash(ref)
1984 with self.assertRaises(NotImplementedError):
1985 null.forget([ref])
1986 with self.assertRaises(NotImplementedError):
1987 null.remove(ref)
1988 with self.assertRaises(NotImplementedError):
1989 null.retrieveArtifacts([ref], ResourcePath("."))
1990 with self.assertRaises(NotImplementedError):
1991 null.transfer_from(null, [ref])
1992 with self.assertRaises(NotImplementedError):
1993 null.ingest()
1996class DatasetRefURIsTestCase(unittest.TestCase):
1997 """Tests for DatasetRefURIs."""
1999 def testSequenceAccess(self) -> None:
2000 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
2001 uris = DatasetRefURIs()
2003 self.assertEqual(len(uris), 2)
2004 self.assertEqual(uris[0], None)
2005 self.assertEqual(uris[1], {})
2007 primaryURI = ResourcePath("1/2/3")
2008 componentURI = ResourcePath("a/b/c")
2010 # affirm that DatasetRefURIs does not support MutableSequence functions
2011 with self.assertRaises(TypeError):
2012 uris[0] = primaryURI
2013 with self.assertRaises(TypeError):
2014 uris[1] = {"foo": componentURI}
2016 # but DatasetRefURIs can be set by property name:
2017 uris.primaryURI = primaryURI
2018 uris.componentURIs = {"foo": componentURI}
2019 self.assertEqual(uris.primaryURI, primaryURI)
2020 self.assertEqual(uris[0], primaryURI)
2022 primary, components = uris
2023 self.assertEqual(primary, primaryURI)
2024 self.assertEqual(components, {"foo": componentURI})
2026 def testRepr(self) -> None:
2027 """Verify __repr__ output."""
2028 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")})
2029 self.assertEqual(
2030 repr(uris),
2031 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})',
2032 )
2035class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase):
2036 """Test the StoredFileInfo class."""
2038 storageClassFactory = StorageClassFactory()
2040 def test_StoredFileInfo(self) -> None:
2041 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
2042 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {})
2044 record = dict(
2045 storage_class="StructuredDataDict",
2046 formatter="lsst.daf.butler.Formatter",
2047 path="a/b/c.txt",
2048 component="component",
2049 checksum=None,
2050 file_size=5,
2051 )
2052 info = StoredFileInfo.from_record(record)
2054 self.assertEqual(info.to_record(), record)
2056 ref2 = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {})
2057 rebased = info.rebase(ref2)
2058 self.assertEqual(rebased.rebase(ref), info)
2060 with self.assertRaises(TypeError):
2061 rebased.update(formatter=42)
2063 with self.assertRaises(ValueError):
2064 rebased.update(something=42, new="42")
2066 # Check that pickle works on StoredFileInfo.
2067 pickled_info = pickle.dumps(info)
2068 unpickled_info = pickle.loads(pickled_info)
2069 self.assertEqual(unpickled_info, info)
2072@contextlib.contextmanager
2073def _temp_yaml_file(data: Any) -> Iterator[str]:
2074 fh = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml")
2075 try:
2076 yaml.dump(data, stream=fh)
2077 fh.flush()
2078 yield fh.name
2079 finally:
2080 # Some tests delete the file
2081 with contextlib.suppress(FileNotFoundError):
2082 fh.close()
2085if __name__ == "__main__":
2086 unittest.main()