Coverage for tests/test_butler.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
34try:
35 import boto3
36 import botocore
37 from moto import mock_s3
38except ImportError:
39 boto3 = None
41 def mock_s3(cls):
42 """A no-op decorator in case moto mock_s3 can not be imported.
43 """
44 return cls
46from lsst.utils import doImport
47from lsst.daf.butler.core.safeFileIo import safeMakeDir
48from lsst.daf.butler import Butler, Config, ButlerConfig
49from lsst.daf.butler import StorageClassFactory
50from lsst.daf.butler import DatasetType, DatasetRef
51from lsst.daf.butler import FileTemplateValidationError, ValidationError
52from lsst.daf.butler import FileDataset
53from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
54from lsst.daf.butler.core.location import ButlerURI
55from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
56 unsetAwsEnvCredentials)
58from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
60TESTDIR = os.path.abspath(os.path.dirname(__file__))
63def makeExampleMetrics():
64 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
65 {"a": [1, 2, 3],
66 "b": {"blue": 5, "red": "green"}},
67 [563, 234, 456.7, 752, 8, 9, 27]
68 )
71class TransactionTestError(Exception):
72 """Specific error for testing transactions, to prevent misdiagnosing
73 that might otherwise occur when a standard exception is used.
74 """
75 pass
78class ButlerConfigTests(unittest.TestCase):
79 """Simple tests for ButlerConfig that are not tested in other test cases.
80 """
82 def testSearchPath(self):
83 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
84 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
85 config1 = ButlerConfig(configFile)
86 self.assertNotIn("testConfigs", "\n".join(cm.output))
88 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
89 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
90 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
91 self.assertIn("testConfigs", "\n".join(cm.output))
93 key = ("datastore", "records", "table")
94 self.assertNotEqual(config1[key], config2[key])
95 self.assertEqual(config2[key], "override_record")
98class ButlerPutGetTests:
99 """Helper method for running a suite of put/get tests from different
100 butler configurations."""
102 root = None
104 @staticmethod
105 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
106 """Create a DatasetType and register it
107 """
108 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
109 registry.registerDatasetType(datasetType)
110 return datasetType
112 @classmethod
113 def setUpClass(cls):
114 cls.storageClassFactory = StorageClassFactory()
115 cls.storageClassFactory.addFromConfig(cls.configFile)
117 def assertGetComponents(self, butler, datasetRef, components, reference):
118 datasetTypeName = datasetRef.datasetType.name
119 dataId = datasetRef.dataId
120 for component in components:
121 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
122 result = butler.get(compTypeName, dataId)
123 self.assertEqual(result, getattr(reference, component))
125 def tearDown(self):
126 if self.root is not None and os.path.exists(self.root):
127 shutil.rmtree(self.root, ignore_errors=True)
129 def runPutGetTest(self, storageClass, datasetTypeName):
130 butler = Butler(self.tmpConfigFile, run="ingest")
132 # There will not be a collection yet
133 collections = butler.registry.getAllCollections()
134 self.assertEqual(collections, set())
136 # Create and register a DatasetType
137 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
139 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
141 # Add needed Dimensions
142 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
143 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
144 "name": "d-r",
145 "abstract_filter": "R"})
146 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
147 "name": "fourtwentythree", "physical_filter": "d-r"})
149 # Create and store a dataset
150 metric = makeExampleMetrics()
151 dataId = {"instrument": "DummyCamComp", "visit": 423}
153 # Create a DatasetRef for put
154 refIn = DatasetRef(datasetType, dataId, id=None)
156 # Put with a preexisting id should fail
157 with self.assertRaises(ValueError):
158 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
160 # Put and remove the dataset once as a DatasetRef, once as a dataId,
161 # and once with a DatasetType
162 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
163 with self.subTest(args=args):
164 ref = butler.put(metric, *args)
165 self.assertIsInstance(ref, DatasetRef)
167 # Test getDirect
168 metricOut = butler.getDirect(ref)
169 self.assertEqual(metric, metricOut)
170 # Test get
171 metricOut = butler.get(ref.datasetType.name, dataId)
172 self.assertEqual(metric, metricOut)
173 # Test get with a datasetRef
174 metricOut = butler.get(ref)
175 self.assertEqual(metric, metricOut)
176 # Test getDeferred with dataId
177 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
178 self.assertEqual(metric, metricOut)
179 # Test getDeferred with a datasetRef
180 metricOut = butler.getDeferred(ref).get()
181 self.assertEqual(metric, metricOut)
183 # Check we can get components
184 if storageClass.isComposite():
185 self.assertGetComponents(butler, ref,
186 ("summary", "data", "output"), metric)
188 # Remove from collection only; after that we shouldn't be able
189 # to find it unless we use the dataset_id.
190 butler.remove(*args, delete=False)
191 with self.assertRaises(LookupError):
192 butler.datasetExists(*args)
193 # If we use the output ref with the dataset_id, we should
194 # still be able to load it with getDirect().
195 self.assertEqual(metric, butler.getDirect(ref))
197 # Reinsert into collection, then delete from Datastore *and*
198 # remove from collection.
199 butler.registry.associate(butler.collection, [ref])
200 butler.remove(*args)
201 # Lookup with original args should still fail.
202 with self.assertRaises(LookupError):
203 butler.datasetExists(*args)
204 # Now getDirect() should fail, too.
205 with self.assertRaises(FileNotFoundError):
206 butler.getDirect(ref)
207 # Registry still knows about it, if we use the dataset_id.
208 self.assertEqual(butler.registry.getDataset(ref.id), ref)
210 # Put again, then remove completely (this generates a new
211 # dataset record in registry, with a new ID - the old one
212 # still exists but it is not in any collection so we don't
213 # care).
214 ref = butler.put(metric, *args)
215 butler.remove(*args, remember=False)
216 # Lookup with original args should still fail.
217 with self.assertRaises(LookupError):
218 butler.datasetExists(*args)
219 # getDirect() should still fail.
220 with self.assertRaises(FileNotFoundError):
221 butler.getDirect(ref)
222 # Registry shouldn't be able to find it by dataset_id anymore.
223 self.assertIsNone(butler.registry.getDataset(ref.id))
225 # Put the dataset again, since the last thing we did was remove it.
226 ref = butler.put(metric, refIn)
228 # Get with parameters
229 stop = 4
230 sliced = butler.get(ref, parameters={"slice": slice(stop)})
231 self.assertNotEqual(metric, sliced)
232 self.assertEqual(metric.summary, sliced.summary)
233 self.assertEqual(metric.output, sliced.output)
234 self.assertEqual(metric.data[:stop], sliced.data)
235 # getDeferred with parameters
236 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
237 self.assertNotEqual(metric, sliced)
238 self.assertEqual(metric.summary, sliced.summary)
239 self.assertEqual(metric.output, sliced.output)
240 self.assertEqual(metric.data[:stop], sliced.data)
241 # getDeferred with deferred parameters
242 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
243 self.assertNotEqual(metric, sliced)
244 self.assertEqual(metric.summary, sliced.summary)
245 self.assertEqual(metric.output, sliced.output)
246 self.assertEqual(metric.data[:stop], sliced.data)
248 if storageClass.isComposite():
249 # Delete one component and check that the other components
250 # can still be retrieved
251 metricOut = butler.get(ref.datasetType.name, dataId)
252 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
253 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
254 summary = butler.get(compNameS, dataId)
255 self.assertEqual(summary, metric.summary)
256 self.assertTrue(butler.datastore.exists(ref.components["summary"]))
258 butler.remove(compNameS, dataId, remember=True)
259 with self.assertRaises(LookupError):
260 butler.datasetExists(compNameS, dataId)
261 self.assertFalse(butler.datastore.exists(ref.components["summary"]))
262 self.assertTrue(butler.datastore.exists(ref.components["data"]))
263 data = butler.get(compNameD, dataId)
264 self.assertEqual(data, metric.data)
266 # Create a Dataset type that has the same name but is inconsistent.
267 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
268 self.storageClassFactory.getStorageClass("Config"))
270 # Getting with a dataset type that does not match registry fails
271 with self.assertRaises(ValueError):
272 butler.get(inconsistentDatasetType, dataId)
274 # Combining a DatasetRef with a dataId should fail
275 with self.assertRaises(ValueError):
276 butler.get(ref, dataId)
277 # Getting with an explicit ref should fail if the id doesn't match
278 with self.assertRaises(ValueError):
279 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
281 # Getting a dataset with unknown parameters should fail
282 with self.assertRaises(KeyError):
283 butler.get(ref, parameters={"unsupported": True})
285 # Check we have a collection
286 collections = butler.registry.getAllCollections()
287 self.assertEqual(collections, {"ingest", })
289 # Clean up to check that we can remove something that may have
290 # already had a component removed
291 butler.remove(ref.datasetType.name, dataId)
293 # Add a dataset back in since some downstream tests require
294 # something to be present
295 ref = butler.put(metric, refIn)
297 return butler
299 # Construct a butler with no run or collection, but make it writeable.
300 butler = Butler(self.tmpConfigFile, writeable=True)
301 # Create and register a DatasetType
302 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
303 datasetType = self.addDatasetType("example", dimensions,
304 self.storageClassFactory.getStorageClass("StructuredData"),
305 butler.registry)
306 # Add needed Dimensions
307 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
308 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
309 "name": "d-r",
310 "abstract_filter": "R"})
311 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
312 "name": "fourtwentythree", "physical_filter": "d-r"})
313 dataId = {"instrument": "DummyCamComp", "visit": 423}
314 # Create dataset.
315 metric = makeExampleMetrics()
316 # Register a new run and put dataset.
317 run = "deferred"
318 butler.registry.registerRun(run)
319 ref = butler.put(metric, datasetType, dataId, run=run)
320 # Putting with no run should fail with TypeError.
321 with self.assertRaises(TypeError):
322 butler.put(metric, datasetType, dataId)
323 # Dataset should exist.
324 self.assertTrue(butler.datasetExists(datasetType, dataId, collection=run))
325 # We should be able to get the dataset back, but with and without
326 # a deferred dataset handle.
327 self.assertEqual(metric, butler.get(datasetType, dataId, collection=run))
328 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collection=run).get())
329 # Trying to find the dataset without any collection is a TypeError.
330 with self.assertRaises(TypeError):
331 butler.datasetExists(datasetType, dataId)
332 with self.assertRaises(TypeError):
333 butler.get(datasetType, dataId)
334 with self.assertRaises(TypeError):
335 butler.remove(datasetType, dataId)
336 # Associate the dataset with a different collection.
337 butler.registry.associate("tagged", [ref])
338 # Deleting the dataset from the new collection should make it findable
339 # in the original collection but without a Datastore entry.
340 butler.remove(datasetType, dataId, collection="tagged")
341 self.assertFalse(butler.datasetExists(datasetType, dataId, collection=run))
344class ButlerTests(ButlerPutGetTests):
345 """Tests for Butler.
346 """
347 useTempRoot = True
349 def setUp(self):
350 """Create a new butler root for each test."""
351 if self.useTempRoot:
352 self.root = tempfile.mkdtemp(dir=TESTDIR)
353 Butler.makeRepo(self.root, config=Config(self.configFile))
354 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
355 else:
356 self.root = None
357 self.tmpConfigFile = self.configFile
359 def testConstructor(self):
360 """Independent test of constructor.
361 """
362 butler = Butler(self.tmpConfigFile, run="ingest")
363 self.assertIsInstance(butler, Butler)
365 collections = butler.registry.getAllCollections()
366 self.assertEqual(collections, set())
368 butler2 = Butler(butler=butler, collection="other")
369 self.assertEqual(butler2.collection, "other")
370 self.assertIsNone(butler2.run)
371 self.assertIs(butler.registry, butler2.registry)
372 self.assertIs(butler.datastore, butler2.datastore)
374 def testBasicPutGet(self):
375 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
376 self.runPutGetTest(storageClass, "test_metric")
378 def testCompositePutGetConcrete(self):
379 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
380 self.runPutGetTest(storageClass, "test_metric")
382 def testCompositePutGetVirtual(self):
383 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
384 self.runPutGetTest(storageClass, "test_metric_comp")
386 def testIngest(self):
387 butler = Butler(self.tmpConfigFile, run="ingest")
389 # Create and register a DatasetType
390 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
392 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
393 datasetTypeName = "metric"
395 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
397 # Add needed Dimensions
398 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
399 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
400 "name": "d-r",
401 "abstract_filter": "R"})
402 for detector in (1, 2):
403 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
404 "full_name": f"detector{detector}"})
406 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
407 "name": "fourtwentythree", "physical_filter": "d-r"},
408 {"instrument": "DummyCamComp", "id": 424,
409 "name": "fourtwentyfour", "physical_filter": "d-r"})
411 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
412 dataRoot = os.path.join(TESTDIR, "data", "basic")
413 datasets = []
414 for detector in (1, 2):
415 detector_name = f"detector_{detector}"
416 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
417 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
418 # Create a DatasetRef for ingest
419 refIn = DatasetRef(datasetType, dataId, id=None)
421 datasets.append(FileDataset(path=metricFile,
422 refs=[refIn],
423 formatter=formatter))
425 butler.ingest(*datasets, transfer="copy")
427 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
428 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
430 metrics1 = butler.get(datasetTypeName, dataId1)
431 metrics2 = butler.get(datasetTypeName, dataId2)
432 self.assertNotEqual(metrics1, metrics2)
434 # Compare URIs
435 uri1 = butler.getUri(datasetTypeName, dataId1)
436 uri2 = butler.getUri(datasetTypeName, dataId2)
437 self.assertNotEqual(uri1, uri2)
439 # Now do a multi-dataset but single file ingest
440 metricFile = os.path.join(dataRoot, "detectors.yaml")
441 refs = []
442 for detector in (1, 2):
443 detector_name = f"detector_{detector}"
444 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
445 # Create a DatasetRef for ingest
446 refs.append(DatasetRef(datasetType, dataId, id=None))
448 datasets = []
449 datasets.append(FileDataset(path=metricFile,
450 refs=refs,
451 formatter=MultiDetectorFormatter))
453 butler.ingest(*datasets, transfer="copy")
455 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
456 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
458 multi1 = butler.get(datasetTypeName, dataId1)
459 multi2 = butler.get(datasetTypeName, dataId2)
461 self.assertEqual(multi1, metrics1)
462 self.assertEqual(multi2, metrics2)
464 # Compare URIs
465 uri1 = butler.getUri(datasetTypeName, dataId1)
466 uri2 = butler.getUri(datasetTypeName, dataId2)
467 self.assertEqual(uri1, uri2)
469 # Test that removing one does not break the second
470 butler.remove(datasetTypeName, dataId1)
471 with self.assertRaises(LookupError):
472 butler.datasetExists(datasetTypeName, dataId1)
473 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
474 multi2b = butler.get(datasetTypeName, dataId2)
475 self.assertEqual(multi2, multi2b)
477 def testPickle(self):
478 """Test pickle support.
479 """
480 butler = Butler(self.tmpConfigFile, run="ingest")
481 butlerOut = pickle.loads(pickle.dumps(butler))
482 self.assertIsInstance(butlerOut, Butler)
483 self.assertEqual(butlerOut._config, butler._config)
484 self.assertEqual(butlerOut.collection, butler.collection)
485 self.assertEqual(butlerOut.run, butler.run)
487 def testGetDatasetTypes(self):
488 butler = Butler(self.tmpConfigFile, run="ingest")
489 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
490 dimensionEntries = [
491 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
492 {"instrument": "DummyCamComp"}),
493 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
494 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
495 ]
496 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
497 # Add needed Dimensions
498 for args in dimensionEntries:
499 butler.registry.insertDimensionData(*args)
501 # When a DatasetType is added to the registry entries are created
502 # for each component. Need entries for each component in the test
503 # configuration otherwise validation won't work. The ones that
504 # are deliberately broken will be ignored later.
505 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"}
506 components = set()
507 for datasetTypeName in datasetTypeNames:
508 # Create and register a DatasetType
509 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
511 for componentName in storageClass.components:
512 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
514 fromRegistry = butler.registry.getAllDatasetTypes()
515 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
517 # Now that we have some dataset types registered, validate them
518 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
519 "datasetType.component"])
521 # Add a new datasetType that will fail template validation
522 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
523 if self.validationCanFail:
524 with self.assertRaises(ValidationError):
525 butler.validateConfiguration()
527 # Rerun validation but with a subset of dataset type names
528 butler.validateConfiguration(datasetTypeNames=["metric4"])
530 # Rerun validation but ignore the bad datasetType
531 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
532 "datasetType.component"])
534 def testTransaction(self):
535 butler = Butler(self.tmpConfigFile, run="ingest")
536 datasetTypeName = "test_metric"
537 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
538 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
539 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
540 "abstract_filter": "R"}),
541 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
542 "physical_filter": "d-r"}))
543 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
544 metric = makeExampleMetrics()
545 dataId = {"instrument": "DummyCam", "visit": 42}
546 with self.assertRaises(TransactionTestError):
547 with butler.transaction():
548 # Create and register a DatasetType
549 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
550 # Add needed Dimensions
551 for args in dimensionEntries:
552 butler.registry.insertDimensionData(*args)
553 # Store a dataset
554 ref = butler.put(metric, datasetTypeName, dataId)
555 self.assertIsInstance(ref, DatasetRef)
556 # Test getDirect
557 metricOut = butler.getDirect(ref)
558 self.assertEqual(metric, metricOut)
559 # Test get
560 metricOut = butler.get(datasetTypeName, dataId)
561 self.assertEqual(metric, metricOut)
562 # Check we can get components
563 self.assertGetComponents(butler, ref,
564 ("summary", "data", "output"), metric)
565 raise TransactionTestError("This should roll back the entire transaction")
567 with self.assertRaises(KeyError):
568 butler.registry.getDatasetType(datasetTypeName)
569 with self.assertRaises(LookupError):
570 butler.registry.expandDataId(dataId)
571 # Should raise KeyError for missing DatasetType
572 with self.assertRaises(KeyError):
573 butler.get(datasetTypeName, dataId)
574 # Also check explicitly if Dataset entry is missing
575 self.assertIsNone(butler.registry.find(butler.collection, datasetType, dataId))
576 # Direct retrieval should not find the file in the Datastore
577 with self.assertRaises(FileNotFoundError):
578 butler.getDirect(ref)
580 def testMakeRepo(self):
581 """Test that we can write butler configuration to a new repository via
582 the Butler.makeRepo interface and then instantiate a butler from the
583 repo root.
584 """
585 # Do not run the test if we know this datastore configuration does
586 # not support a file system root
587 if self.fullConfigKey is None:
588 return
590 # Remove the file created in setUp
591 os.unlink(self.tmpConfigFile)
593 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile))
594 limited = Config(self.configFile)
595 butler1 = Butler(butlerConfig)
596 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
597 config=Config(self.configFile), overwrite=True)
598 full = Config(self.tmpConfigFile)
599 butler2 = Butler(butlerConfig)
600 # Butlers should have the same configuration regardless of whether
601 # defaults were expanded.
602 self.assertEqual(butler1._config, butler2._config)
603 # Config files loaded directly should not be the same.
604 self.assertNotEqual(limited, full)
605 # Make sure "limited" doesn't have a few keys we know it should be
606 # inheriting from defaults.
607 self.assertIn(self.fullConfigKey, full)
608 self.assertNotIn(self.fullConfigKey, limited)
610 # Collections don't appear until something is put in them
611 collections1 = butler1.registry.getAllCollections()
612 self.assertEqual(collections1, set())
613 self.assertEqual(butler2.registry.getAllCollections(), collections1)
615 # Check that a config with no associated file name will not
616 # work properly with relocatable Butler repo
617 butlerConfig.configFile = None
618 with self.assertRaises(ValueError):
619 Butler(butlerConfig)
621 with self.assertRaises(FileExistsError):
622 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
623 config=Config(self.configFile), overwrite=False)
625 def testStringification(self):
626 butler = Butler(self.tmpConfigFile, run="ingest")
627 butlerStr = str(butler)
629 if self.datastoreStr is not None:
630 for testStr in self.datastoreStr:
631 self.assertIn(testStr, butlerStr)
632 if self.registryStr is not None:
633 self.assertIn(self.registryStr, butlerStr)
635 datastoreName = butler.datastore.name
636 if self.datastoreName is not None:
637 for testStr in self.datastoreName:
638 self.assertIn(testStr, datastoreName)
641class FileLikeDatastoreButlerTests(ButlerTests):
642 """Common tests and specialization of ButlerTests for butlers backed
643 by datastores that inherit from FileLikeDatastore.
644 """
646 def checkFileExists(self, root, path):
647 """Checks if file exists at a given path (relative to root).
649 Test testPutTemplates verifies actual physical existance of the files
650 in the requested location. For POSIXDatastore this test is equivalent
651 to `os.path.exists` call.
652 """
653 return os.path.exists(os.path.join(root, path))
655 def testPutTemplates(self):
656 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
657 butler = Butler(self.tmpConfigFile, run="ingest")
659 # Add needed Dimensions
660 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
661 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
662 "name": "d-r",
663 "abstract_filter": "R"})
664 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
665 "physical_filter": "d-r"})
666 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
667 "physical_filter": "d-r"})
669 # Create and store a dataset
670 metric = makeExampleMetrics()
672 # Create two almost-identical DatasetTypes (both will use default
673 # template)
674 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
675 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
676 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
677 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
679 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
680 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
681 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
683 # Put with exactly the data ID keys needed
684 ref = butler.put(metric, "metric1", dataId1)
685 self.assertTrue(self.checkFileExists(butler.datastore.root,
686 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
688 # Check the template based on dimensions
689 butler.datastore.templates.validateTemplates([ref])
691 # Put with extra data ID keys (physical_filter is an optional
692 # dependency); should not change template (at least the way we're
693 # defining them to behave now; the important thing is that they
694 # must be consistent).
695 ref = butler.put(metric, "metric2", dataId2)
696 self.assertTrue(self.checkFileExists(butler.datastore.root,
697 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
699 # Check the template based on dimensions
700 butler.datastore.templates.validateTemplates([ref])
702 # Now use a file template that will not result in unique filenames
703 ref = butler.put(metric, "metric3", dataId1)
705 # Check the template based on dimensions. This one is a bad template
706 with self.assertRaises(FileTemplateValidationError):
707 butler.datastore.templates.validateTemplates([ref])
709 with self.assertRaises(FileExistsError):
710 butler.put(metric, "metric3", dataId3)
712 def testImportExport(self):
713 # Run put/get tests just to create and populate a repo.
714 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
715 exportButler = self.runPutGetTest(storageClass, "test_metric")
716 # Test that the repo actually has at least one dataset.
717 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
718 self.assertGreater(len(datasets), 0)
719 # Export those datasets. We used TemporaryDirectory because there
720 # doesn't seem to be a way to get the filename (as opposed to the file
721 # object) from any of tempfile's temporary-file context managers.
722 with tempfile.TemporaryDirectory() as exportDir:
723 # TODO: When PosixDatastore supports transfer-on-exist, add tests
724 # for that.
725 exportFile = os.path.join(exportDir, "exports.yaml")
726 with exportButler.export(filename=exportFile) as export:
727 export.saveDatasets(datasets)
728 self.assertTrue(os.path.exists(exportFile))
729 with tempfile.TemporaryDirectory() as importDir:
730 Butler.makeRepo(importDir, config=Config(self.configFile))
731 importButler = Butler(importDir, run="ingest")
732 importButler.import_(filename=exportFile, directory=exportButler.datastore.root,
733 transfer="symlink")
734 for ref in datasets:
735 with self.subTest(ref=ref):
736 # Test for existence by passing in the DatasetType and
737 # data ID separately, to avoid lookup by dataset_id.
738 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
741class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
742 """PosixDatastore specialization of a butler"""
743 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
744 fullConfigKey = ".datastore.formatters"
745 validationCanFail = True
746 datastoreStr = ["/tmp"]
747 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
748 registryStr = "/gen3.sqlite3"
751class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
752 """InMemoryDatastore specialization of a butler"""
753 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
754 fullConfigKey = None
755 useTempRoot = False
756 validationCanFail = False
757 datastoreStr = ["datastore='InMemory"]
758 datastoreName = ["InMemoryDatastore@"]
759 registryStr = ":memory:"
761 def testIngest(self):
762 pass
765class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
766 """PosixDatastore specialization"""
767 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
768 fullConfigKey = ".datastore.datastores.1.formatters"
769 validationCanFail = True
770 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
771 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
772 "SecondDatastore"]
773 registryStr = "/gen3.sqlite3"
776class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
777 """Test that a yaml file in one location can refer to a root in another."""
779 datastoreStr = ["dir1"]
780 # Disable the makeRepo test since we are deliberately not using
781 # butler.yaml as the config name.
782 fullConfigKey = None
784 def setUp(self):
785 self.root = tempfile.mkdtemp(dir=TESTDIR)
787 # Make a new repository in one place
788 self.dir1 = os.path.join(self.root, "dir1")
789 Butler.makeRepo(self.dir1, config=Config(self.configFile))
791 # Move the yaml file to a different place and add a "root"
792 self.dir2 = os.path.join(self.root, "dir2")
793 safeMakeDir(self.dir2)
794 configFile1 = os.path.join(self.dir1, "butler.yaml")
795 config = Config(configFile1)
796 config["root"] = self.dir1
797 configFile2 = os.path.join(self.dir2, "butler2.yaml")
798 config.dumpToFile(configFile2)
799 os.remove(configFile1)
800 self.tmpConfigFile = configFile2
802 def testFileLocations(self):
803 self.assertNotEqual(self.dir1, self.dir2)
804 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
805 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
806 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
809class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
810 """Test that a config file created by makeRepo outside of repo works."""
812 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
814 def setUp(self):
815 self.root = tempfile.mkdtemp(dir=TESTDIR)
816 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
818 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
819 Butler.makeRepo(self.root, config=Config(self.configFile),
820 outfile=self.tmpConfigFile)
822 def tearDown(self):
823 if os.path.exists(self.root2):
824 shutil.rmtree(self.root2, ignore_errors=True)
825 super().tearDown()
827 def testConfigExistence(self):
828 c = Config(self.tmpConfigFile)
829 uri_config = ButlerURI(c["root"])
830 uri_expected = ButlerURI(self.root)
831 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
832 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
834 def testPutGet(self):
835 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
836 self.runPutGetTest(storageClass, "test_metric")
839class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
840 """Test that a config file created by makeRepo outside of repo works."""
842 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
844 def setUp(self):
845 self.root = tempfile.mkdtemp(dir=TESTDIR)
846 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
848 self.tmpConfigFile = self.root2
849 Butler.makeRepo(self.root, config=Config(self.configFile),
850 outfile=self.tmpConfigFile)
852 def testConfigExistence(self):
853 # Append the yaml file else Config constructor does not know the file
854 # type.
855 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
856 super().testConfigExistence()
859class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
860 """Test that a config file created by makeRepo outside of repo works."""
862 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
864 def setUp(self):
865 self.root = tempfile.mkdtemp(dir=TESTDIR)
866 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
868 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
869 Butler.makeRepo(self.root, config=Config(self.configFile),
870 outfile=self.tmpConfigFile)
873@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
874@mock_s3
875class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
876 """S3Datastore specialization of a butler; an S3 storage Datastore +
877 a local in-memory SqlRegistry.
878 """
879 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
880 fullConfigKey = None
881 validationCanFail = True
883 bucketName = "anybucketname"
884 """Name of the Bucket that will be used in the tests. The name is read from
885 the config file used with the tests during set-up.
886 """
888 root = "butlerRoot/"
889 """Root repository directory expected to be used in case useTempRoot=False.
890 Otherwise the root is set to a 20 characters long randomly generated string
891 during set-up.
892 """
894 datastoreStr = [f"datastore={root}"]
895 """Contains all expected root locations in a format expected to be
896 returned by Butler stringification.
897 """
899 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
900 """The expected format of the S3Datastore string."""
902 registryStr = f":memory:"
903 """Expected format of the Registry string."""
905 def genRoot(self):
906 """Returns a random string of len 20 to serve as a root
907 name for the temporary bucket repo.
909 This is equivalent to tempfile.mkdtemp as this is what self.root
910 becomes when useTempRoot is True.
911 """
912 rndstr = "".join(
913 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
914 )
915 return rndstr + "/"
917 def setUp(self):
918 config = Config(self.configFile)
919 uri = ButlerURI(config[".datastore.datastore.root"])
920 self.bucketName = uri.netloc
922 # set up some fake credentials if they do not exist
923 self.usingDummyCredentials = setAwsEnvCredentials()
925 if self.useTempRoot:
926 self.root = self.genRoot()
927 rooturi = f"s3://{self.bucketName}/{self.root}"
928 config.update({"datastore": {"datastore": {"root": rooturi}}})
930 # MOTO needs to know that we expect Bucket bucketname to exist
931 # (this used to be the class attribute bucketName)
932 s3 = boto3.resource("s3")
933 s3.create_bucket(Bucket=self.bucketName)
935 self.datastoreStr = f"datastore={self.root}"
936 self.datastoreName = [f"S3Datastore@{rooturi}"]
937 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
938 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
940 def tearDown(self):
941 s3 = boto3.resource("s3")
942 bucket = s3.Bucket(self.bucketName)
943 try:
944 bucket.objects.all().delete()
945 except botocore.exceptions.ClientError as e:
946 if e.response["Error"]["Code"] == "404":
947 # the key was not reachable - pass
948 pass
949 else:
950 raise
952 bucket = s3.Bucket(self.bucketName)
953 bucket.delete()
955 # unset any potentially set dummy credentials
956 if self.usingDummyCredentials:
957 unsetAwsEnvCredentials()
959 def checkFileExists(self, root, relpath):
960 """Checks if file exists at a given path (relative to root).
962 Test testPutTemplates verifies actual physical existance of the files
963 in the requested location. For S3Datastore this test is equivalent to
964 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
965 """
966 uri = ButlerURI(root)
967 client = boto3.client("s3")
968 return s3CheckFileExists(uri, client=client)[0]
970 @unittest.expectedFailure
971 def testImportExport(self):
972 super().testImportExport()
975if __name__ == "__main__": 975 ↛ 976line 975 didn't jump to line 976, because the condition on line 975 was never true
976 unittest.main()