Coverage for tests/test_butler.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
34try:
35 import boto3
36 import botocore
37 from moto import mock_s3
38except ImportError:
39 boto3 = None
41 def mock_s3(cls):
42 """A no-op decorator in case moto mock_s3 can not be imported.
43 """
44 return cls
46from lsst.utils import doImport
47from lsst.daf.butler.core.safeFileIo import safeMakeDir
48from lsst.daf.butler import Butler, Config, ButlerConfig
49from lsst.daf.butler import StorageClassFactory
50from lsst.daf.butler import DatasetType, DatasetRef
51from lsst.daf.butler import FileTemplateValidationError, ValidationError
52from lsst.daf.butler import FileDataset
53from lsst.daf.butler import CollectionSearch
54from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
55from lsst.daf.butler.core.location import ButlerURI
56from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
57 unsetAwsEnvCredentials)
59from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
61TESTDIR = os.path.abspath(os.path.dirname(__file__))
64def makeExampleMetrics():
65 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
66 {"a": [1, 2, 3],
67 "b": {"blue": 5, "red": "green"}},
68 [563, 234, 456.7, 752, 8, 9, 27]
69 )
72class TransactionTestError(Exception):
73 """Specific error for testing transactions, to prevent misdiagnosing
74 that might otherwise occur when a standard exception is used.
75 """
76 pass
79class ButlerConfigTests(unittest.TestCase):
80 """Simple tests for ButlerConfig that are not tested in other test cases.
81 """
83 def testSearchPath(self):
84 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
85 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
86 config1 = ButlerConfig(configFile)
87 self.assertNotIn("testConfigs", "\n".join(cm.output))
89 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
90 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
91 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
92 self.assertIn("testConfigs", "\n".join(cm.output))
94 key = ("datastore", "records", "table")
95 self.assertNotEqual(config1[key], config2[key])
96 self.assertEqual(config2[key], "override_record")
99class ButlerPutGetTests:
100 """Helper method for running a suite of put/get tests from different
101 butler configurations."""
103 root = None
105 @staticmethod
106 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
107 """Create a DatasetType and register it
108 """
109 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
110 registry.registerDatasetType(datasetType)
111 return datasetType
113 @classmethod
114 def setUpClass(cls):
115 cls.storageClassFactory = StorageClassFactory()
116 cls.storageClassFactory.addFromConfig(cls.configFile)
118 def assertGetComponents(self, butler, datasetRef, components, reference):
119 datasetTypeName = datasetRef.datasetType.name
120 dataId = datasetRef.dataId
121 for component in components:
122 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
123 result = butler.get(compTypeName, dataId)
124 self.assertEqual(result, getattr(reference, component))
126 def tearDown(self):
127 if self.root is not None and os.path.exists(self.root):
128 shutil.rmtree(self.root, ignore_errors=True)
130 def runPutGetTest(self, storageClass, datasetTypeName):
131 # New datasets will be added to run and tag, but we will only look in
132 # tag when looking up datasets.
133 run = "ingest/run"
134 tag = "ingest"
135 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
137 # There will not be a collection yet
138 collections = set(butler.registry.queryCollections())
139 self.assertEqual(collections, set([run, tag]))
141 # Create and register a DatasetType
142 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
144 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
146 # Add needed Dimensions
147 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
148 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
149 "name": "d-r",
150 "abstract_filter": "R"})
151 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
152 "name": "fourtwentythree", "physical_filter": "d-r"})
154 # Create and store a dataset
155 metric = makeExampleMetrics()
156 dataId = {"instrument": "DummyCamComp", "visit": 423}
158 # Create a DatasetRef for put
159 refIn = DatasetRef(datasetType, dataId, id=None)
161 # Put with a preexisting id should fail
162 with self.assertRaises(ValueError):
163 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
165 # Put and remove the dataset once as a DatasetRef, once as a dataId,
166 # and once with a DatasetType
167 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
168 with self.subTest(args=args):
169 ref = butler.put(metric, *args)
170 self.assertIsInstance(ref, DatasetRef)
172 # Test getDirect
173 metricOut = butler.getDirect(ref)
174 self.assertEqual(metric, metricOut)
175 # Test get
176 metricOut = butler.get(ref.datasetType.name, dataId)
177 self.assertEqual(metric, metricOut)
178 # Test get with a datasetRef
179 metricOut = butler.get(ref)
180 self.assertEqual(metric, metricOut)
181 # Test getDeferred with dataId
182 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
183 self.assertEqual(metric, metricOut)
184 # Test getDeferred with a datasetRef
185 metricOut = butler.getDeferred(ref).get()
186 self.assertEqual(metric, metricOut)
188 # Check we can get components
189 if storageClass.isComposite():
190 self.assertGetComponents(butler, ref,
191 ("summary", "data", "output"), metric)
193 # Remove from the tagged collection only; after that we
194 # shouldn't be able to find it unless we use the dataset_id.
195 butler.prune([ref])
196 with self.assertRaises(LookupError):
197 butler.datasetExists(*args)
198 # Registry still knows about it, if we use the dataset_id.
199 self.assertEqual(butler.registry.getDataset(ref.id), ref)
200 # If we use the output ref with the dataset_id, we should
201 # still be able to load it with getDirect().
202 self.assertEqual(metric, butler.getDirect(ref))
204 # Reinsert into collection, then delete from Datastore *and*
205 # remove from collection.
206 butler.registry.associate(tag, [ref])
207 butler.prune([ref], unstore=True)
208 # Lookup with original args should still fail.
209 with self.assertRaises(LookupError):
210 butler.datasetExists(*args)
211 # Now getDirect() should fail, too.
212 with self.assertRaises(FileNotFoundError):
213 butler.getDirect(ref)
214 # Registry still knows about it, if we use the dataset_id.
215 self.assertEqual(butler.registry.getDataset(ref.id), ref)
217 # Now remove the dataset completely.
218 butler.prune([ref], purge=True, unstore=True)
219 # Lookup with original args should still fail.
220 with self.assertRaises(LookupError):
221 butler.datasetExists(*args)
222 # getDirect() should still fail.
223 with self.assertRaises(FileNotFoundError):
224 butler.getDirect(ref)
225 # Registry shouldn't be able to find it by dataset_id anymore.
226 self.assertIsNone(butler.registry.getDataset(ref.id))
228 # Put the dataset again, since the last thing we did was remove it.
229 ref = butler.put(metric, refIn)
231 # Get with parameters
232 stop = 4
233 sliced = butler.get(ref, parameters={"slice": slice(stop)})
234 self.assertNotEqual(metric, sliced)
235 self.assertEqual(metric.summary, sliced.summary)
236 self.assertEqual(metric.output, sliced.output)
237 self.assertEqual(metric.data[:stop], sliced.data)
238 # getDeferred with parameters
239 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
240 self.assertNotEqual(metric, sliced)
241 self.assertEqual(metric.summary, sliced.summary)
242 self.assertEqual(metric.output, sliced.output)
243 self.assertEqual(metric.data[:stop], sliced.data)
244 # getDeferred with deferred parameters
245 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
246 self.assertNotEqual(metric, sliced)
247 self.assertEqual(metric.summary, sliced.summary)
248 self.assertEqual(metric.output, sliced.output)
249 self.assertEqual(metric.data[:stop], sliced.data)
251 if storageClass.isComposite():
252 # Delete one component and check that the other components
253 # can still be retrieved
254 metricOut = butler.get(ref.datasetType.name, dataId)
255 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
256 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
257 summary = butler.get(compNameS, dataId)
258 self.assertEqual(summary, metric.summary)
259 self.assertTrue(butler.datastore.exists(ref.components["summary"]))
261 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
262 butler.prune([compRef], unstore=True)
263 with self.assertRaises(LookupError):
264 butler.datasetExists(compNameS, dataId)
265 self.assertFalse(butler.datastore.exists(ref.components["summary"]))
266 self.assertTrue(butler.datastore.exists(ref.components["data"]))
267 data = butler.get(compNameD, dataId)
268 self.assertEqual(data, metric.data)
270 # Create a Dataset type that has the same name but is inconsistent.
271 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
272 self.storageClassFactory.getStorageClass("Config"))
274 # Getting with a dataset type that does not match registry fails
275 with self.assertRaises(ValueError):
276 butler.get(inconsistentDatasetType, dataId)
278 # Combining a DatasetRef with a dataId should fail
279 with self.assertRaises(ValueError):
280 butler.get(ref, dataId)
281 # Getting with an explicit ref should fail if the id doesn't match
282 with self.assertRaises(ValueError):
283 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
285 # Getting a dataset with unknown parameters should fail
286 with self.assertRaises(KeyError):
287 butler.get(ref, parameters={"unsupported": True})
289 # Check we have a collection
290 collections = set(butler.registry.queryCollections())
291 self.assertEqual(collections, {run, tag})
293 # Clean up to check that we can remove something that may have
294 # already had a component removed
295 butler.prune([ref], unstore=True, purge=True)
297 # Add a dataset back in since some downstream tests require
298 # something to be present
299 ref = butler.put(metric, refIn)
301 return butler
303 def testDeferredCollectionPassing(self):
304 # Construct a butler with no run or collection, but make it writeable.
305 butler = Butler(self.tmpConfigFile, writeable=True)
306 # Create and register a DatasetType
307 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
308 datasetType = self.addDatasetType("example", dimensions,
309 self.storageClassFactory.getStorageClass("StructuredData"),
310 butler.registry)
311 # Add needed Dimensions
312 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
313 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
314 "name": "d-r",
315 "abstract_filter": "R"})
316 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
317 "name": "fourtwentythree", "physical_filter": "d-r"})
318 dataId = {"instrument": "DummyCamComp", "visit": 423}
319 # Create dataset.
320 metric = makeExampleMetrics()
321 # Register a new run and put dataset.
322 run = "deferred"
323 butler.registry.registerRun(run)
324 ref = butler.put(metric, datasetType, dataId, run=run)
325 # Putting with no run should fail with TypeError.
326 with self.assertRaises(TypeError):
327 butler.put(metric, datasetType, dataId)
328 # Dataset should exist.
329 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
330 # We should be able to get the dataset back, but with and without
331 # a deferred dataset handle.
332 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
333 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
334 # Trying to find the dataset without any collection is a TypeError.
335 with self.assertRaises(TypeError):
336 butler.datasetExists(datasetType, dataId)
337 with self.assertRaises(TypeError):
338 butler.get(datasetType, dataId)
339 # Associate the dataset with a different collection.
340 butler.registry.registerCollection("tagged")
341 butler.registry.associate("tagged", [ref])
342 # Deleting the dataset from the new collection should make it findable
343 # in the original collection.
344 butler.prune([ref], tags=["tagged"])
345 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
348class ButlerTests(ButlerPutGetTests):
349 """Tests for Butler.
350 """
351 useTempRoot = True
353 def setUp(self):
354 """Create a new butler root for each test."""
355 if self.useTempRoot:
356 self.root = tempfile.mkdtemp(dir=TESTDIR)
357 Butler.makeRepo(self.root, config=Config(self.configFile))
358 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
359 else:
360 self.root = None
361 self.tmpConfigFile = self.configFile
363 def testConstructor(self):
364 """Independent test of constructor.
365 """
366 butler = Butler(self.tmpConfigFile, run="ingest")
367 self.assertIsInstance(butler, Butler)
369 collections = set(butler.registry.queryCollections())
370 self.assertEqual(collections, {"ingest"})
372 butler2 = Butler(butler=butler, collections=["other"])
373 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
374 self.assertIsNone(butler2.run)
375 self.assertIs(butler.registry, butler2.registry)
376 self.assertIs(butler.datastore, butler2.datastore)
378 def testBasicPutGet(self):
379 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
380 self.runPutGetTest(storageClass, "test_metric")
382 def testCompositePutGetConcrete(self):
383 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
384 self.runPutGetTest(storageClass, "test_metric")
386 def testCompositePutGetVirtual(self):
387 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
388 self.runPutGetTest(storageClass, "test_metric_comp")
390 def testIngest(self):
391 butler = Butler(self.tmpConfigFile, run="ingest")
393 # Create and register a DatasetType
394 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
396 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
397 datasetTypeName = "metric"
399 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
401 # Add needed Dimensions
402 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
403 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
404 "name": "d-r",
405 "abstract_filter": "R"})
406 for detector in (1, 2):
407 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
408 "full_name": f"detector{detector}"})
410 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
411 "name": "fourtwentythree", "physical_filter": "d-r"},
412 {"instrument": "DummyCamComp", "id": 424,
413 "name": "fourtwentyfour", "physical_filter": "d-r"})
415 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
416 dataRoot = os.path.join(TESTDIR, "data", "basic")
417 datasets = []
418 for detector in (1, 2):
419 detector_name = f"detector_{detector}"
420 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
421 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
422 # Create a DatasetRef for ingest
423 refIn = DatasetRef(datasetType, dataId, id=None)
425 datasets.append(FileDataset(path=metricFile,
426 refs=[refIn],
427 formatter=formatter))
429 butler.ingest(*datasets, transfer="copy")
431 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
432 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
434 metrics1 = butler.get(datasetTypeName, dataId1)
435 metrics2 = butler.get(datasetTypeName, dataId2)
436 self.assertNotEqual(metrics1, metrics2)
438 # Compare URIs
439 uri1 = butler.getUri(datasetTypeName, dataId1)
440 uri2 = butler.getUri(datasetTypeName, dataId2)
441 self.assertNotEqual(uri1, uri2)
443 # Now do a multi-dataset but single file ingest
444 metricFile = os.path.join(dataRoot, "detectors.yaml")
445 refs = []
446 for detector in (1, 2):
447 detector_name = f"detector_{detector}"
448 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
449 # Create a DatasetRef for ingest
450 refs.append(DatasetRef(datasetType, dataId, id=None))
452 datasets = []
453 datasets.append(FileDataset(path=metricFile,
454 refs=refs,
455 formatter=MultiDetectorFormatter))
457 butler.ingest(*datasets, transfer="copy")
459 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
460 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
462 multi1 = butler.get(datasetTypeName, dataId1)
463 multi2 = butler.get(datasetTypeName, dataId2)
465 self.assertEqual(multi1, metrics1)
466 self.assertEqual(multi2, metrics2)
468 # Compare URIs
469 uri1 = butler.getUri(datasetTypeName, dataId1)
470 uri2 = butler.getUri(datasetTypeName, dataId2)
471 self.assertEqual(uri1, uri2)
473 # Test that removing one does not break the second
474 butler.prune([datasets[0].refs[0]], unstore=True, disassociate=False)
475 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
476 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
477 multi2b = butler.get(datasetTypeName, dataId2)
478 self.assertEqual(multi2, multi2b)
480 def testPickle(self):
481 """Test pickle support.
482 """
483 butler = Butler(self.tmpConfigFile, run="ingest")
484 butlerOut = pickle.loads(pickle.dumps(butler))
485 self.assertIsInstance(butlerOut, Butler)
486 self.assertEqual(butlerOut._config, butler._config)
487 self.assertEqual(butlerOut.collections, butler.collections)
488 self.assertEqual(butlerOut.run, butler.run)
490 def testGetDatasetTypes(self):
491 butler = Butler(self.tmpConfigFile, run="ingest")
492 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
493 dimensionEntries = [
494 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
495 {"instrument": "DummyCamComp"}),
496 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
497 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
498 ]
499 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
500 # Add needed Dimensions
501 for args in dimensionEntries:
502 butler.registry.insertDimensionData(*args)
504 # When a DatasetType is added to the registry entries are created
505 # for each component. Need entries for each component in the test
506 # configuration otherwise validation won't work. The ones that
507 # are deliberately broken will be ignored later.
508 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"}
509 components = set()
510 for datasetTypeName in datasetTypeNames:
511 # Create and register a DatasetType
512 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
514 for componentName in storageClass.components:
515 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
517 fromRegistry = set(butler.registry.queryDatasetTypes())
518 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
520 # Now that we have some dataset types registered, validate them
521 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
522 "datasetType.component"])
524 # Add a new datasetType that will fail template validation
525 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
526 if self.validationCanFail:
527 with self.assertRaises(ValidationError):
528 butler.validateConfiguration()
530 # Rerun validation but with a subset of dataset type names
531 butler.validateConfiguration(datasetTypeNames=["metric4"])
533 # Rerun validation but ignore the bad datasetType
534 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
535 "datasetType.component"])
537 def testTransaction(self):
538 butler = Butler(self.tmpConfigFile, run="ingest")
539 datasetTypeName = "test_metric"
540 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
541 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
542 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
543 "abstract_filter": "R"}),
544 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
545 "physical_filter": "d-r"}))
546 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
547 metric = makeExampleMetrics()
548 dataId = {"instrument": "DummyCam", "visit": 42}
549 with self.assertRaises(TransactionTestError):
550 with butler.transaction():
551 # Create and register a DatasetType
552 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
553 # Add needed Dimensions
554 for args in dimensionEntries:
555 butler.registry.insertDimensionData(*args)
556 # Store a dataset
557 ref = butler.put(metric, datasetTypeName, dataId)
558 self.assertIsInstance(ref, DatasetRef)
559 # Test getDirect
560 metricOut = butler.getDirect(ref)
561 self.assertEqual(metric, metricOut)
562 # Test get
563 metricOut = butler.get(datasetTypeName, dataId)
564 self.assertEqual(metric, metricOut)
565 # Check we can get components
566 self.assertGetComponents(butler, ref,
567 ("summary", "data", "output"), metric)
568 raise TransactionTestError("This should roll back the entire transaction")
570 with self.assertRaises(KeyError):
571 butler.registry.getDatasetType(datasetTypeName)
572 with self.assertRaises(LookupError):
573 butler.registry.expandDataId(dataId)
574 # Should raise KeyError for missing DatasetType
575 with self.assertRaises(KeyError):
576 butler.get(datasetTypeName, dataId)
577 # Also check explicitly if Dataset entry is missing
578 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
579 # Direct retrieval should not find the file in the Datastore
580 with self.assertRaises(FileNotFoundError):
581 butler.getDirect(ref)
583 def testMakeRepo(self):
584 """Test that we can write butler configuration to a new repository via
585 the Butler.makeRepo interface and then instantiate a butler from the
586 repo root.
587 """
588 # Do not run the test if we know this datastore configuration does
589 # not support a file system root
590 if self.fullConfigKey is None:
591 return
593 # Remove the file created in setUp
594 os.unlink(self.tmpConfigFile)
596 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile))
597 limited = Config(self.configFile)
598 butler1 = Butler(butlerConfig)
599 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
600 config=Config(self.configFile), overwrite=True)
601 full = Config(self.tmpConfigFile)
602 butler2 = Butler(butlerConfig)
603 # Butlers should have the same configuration regardless of whether
604 # defaults were expanded.
605 self.assertEqual(butler1._config, butler2._config)
606 # Config files loaded directly should not be the same.
607 self.assertNotEqual(limited, full)
608 # Make sure "limited" doesn't have a few keys we know it should be
609 # inheriting from defaults.
610 self.assertIn(self.fullConfigKey, full)
611 self.assertNotIn(self.fullConfigKey, limited)
613 # Collections don't appear until something is put in them
614 collections1 = set(butler1.registry.queryCollections())
615 self.assertEqual(collections1, set())
616 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
618 # Check that a config with no associated file name will not
619 # work properly with relocatable Butler repo
620 butlerConfig.configFile = None
621 with self.assertRaises(ValueError):
622 Butler(butlerConfig)
624 with self.assertRaises(FileExistsError):
625 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
626 config=Config(self.configFile), overwrite=False)
628 def testStringification(self):
629 butler = Butler(self.tmpConfigFile, run="ingest")
630 butlerStr = str(butler)
632 if self.datastoreStr is not None:
633 for testStr in self.datastoreStr:
634 self.assertIn(testStr, butlerStr)
635 if self.registryStr is not None:
636 self.assertIn(self.registryStr, butlerStr)
638 datastoreName = butler.datastore.name
639 if self.datastoreName is not None:
640 for testStr in self.datastoreName:
641 self.assertIn(testStr, datastoreName)
644class FileLikeDatastoreButlerTests(ButlerTests):
645 """Common tests and specialization of ButlerTests for butlers backed
646 by datastores that inherit from FileLikeDatastore.
647 """
649 def checkFileExists(self, root, path):
650 """Checks if file exists at a given path (relative to root).
652 Test testPutTemplates verifies actual physical existance of the files
653 in the requested location. For POSIXDatastore this test is equivalent
654 to `os.path.exists` call.
655 """
656 return os.path.exists(os.path.join(root, path))
658 def testPutTemplates(self):
659 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
660 butler = Butler(self.tmpConfigFile, run="ingest")
662 # Add needed Dimensions
663 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
664 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
665 "name": "d-r",
666 "abstract_filter": "R"})
667 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
668 "physical_filter": "d-r"})
669 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
670 "physical_filter": "d-r"})
672 # Create and store a dataset
673 metric = makeExampleMetrics()
675 # Create two almost-identical DatasetTypes (both will use default
676 # template)
677 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
678 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
679 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
680 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
682 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
683 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
684 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
686 # Put with exactly the data ID keys needed
687 ref = butler.put(metric, "metric1", dataId1)
688 self.assertTrue(self.checkFileExists(butler.datastore.root,
689 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
691 # Check the template based on dimensions
692 butler.datastore.templates.validateTemplates([ref])
694 # Put with extra data ID keys (physical_filter is an optional
695 # dependency); should not change template (at least the way we're
696 # defining them to behave now; the important thing is that they
697 # must be consistent).
698 ref = butler.put(metric, "metric2", dataId2)
699 self.assertTrue(self.checkFileExists(butler.datastore.root,
700 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
702 # Check the template based on dimensions
703 butler.datastore.templates.validateTemplates([ref])
705 # Now use a file template that will not result in unique filenames
706 ref = butler.put(metric, "metric3", dataId1)
708 # Check the template based on dimensions. This one is a bad template
709 with self.assertRaises(FileTemplateValidationError):
710 butler.datastore.templates.validateTemplates([ref])
712 with self.assertRaises(FileExistsError):
713 butler.put(metric, "metric3", dataId3)
715 def testImportExport(self):
716 # Run put/get tests just to create and populate a repo.
717 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
718 exportButler = self.runPutGetTest(storageClass, "test_metric")
719 # Test that the repo actually has at least one dataset.
720 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
721 self.assertGreater(len(datasets), 0)
722 # Export those datasets. We used TemporaryDirectory because there
723 # doesn't seem to be a way to get the filename (as opposed to the file
724 # object) from any of tempfile's temporary-file context managers.
725 with tempfile.TemporaryDirectory() as exportDir:
726 # TODO: When PosixDatastore supports transfer-on-exist, add tests
727 # for that.
728 exportFile = os.path.join(exportDir, "exports.yaml")
729 with exportButler.export(filename=exportFile) as export:
730 export.saveDatasets(datasets)
731 self.assertTrue(os.path.exists(exportFile))
732 with tempfile.TemporaryDirectory() as importDir:
733 Butler.makeRepo(importDir, config=Config(self.configFile))
734 importButler = Butler(importDir, run="ingest/run")
735 importButler.import_(filename=exportFile, directory=exportButler.datastore.root,
736 transfer="symlink")
737 for ref in datasets:
738 with self.subTest(ref=ref):
739 # Test for existence by passing in the DatasetType and
740 # data ID separately, to avoid lookup by dataset_id.
741 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
744class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
745 """PosixDatastore specialization of a butler"""
746 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
747 fullConfigKey = ".datastore.formatters"
748 validationCanFail = True
749 datastoreStr = ["/tmp"]
750 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
751 registryStr = "/gen3.sqlite3"
754class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
755 """InMemoryDatastore specialization of a butler"""
756 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
757 fullConfigKey = None
758 useTempRoot = False
759 validationCanFail = False
760 datastoreStr = ["datastore='InMemory"]
761 datastoreName = ["InMemoryDatastore@"]
762 registryStr = ":memory:"
764 def testIngest(self):
765 pass
768class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
769 """PosixDatastore specialization"""
770 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
771 fullConfigKey = ".datastore.datastores.1.formatters"
772 validationCanFail = True
773 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
774 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
775 "SecondDatastore"]
776 registryStr = "/gen3.sqlite3"
779class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
780 """Test that a yaml file in one location can refer to a root in another."""
782 datastoreStr = ["dir1"]
783 # Disable the makeRepo test since we are deliberately not using
784 # butler.yaml as the config name.
785 fullConfigKey = None
787 def setUp(self):
788 self.root = tempfile.mkdtemp(dir=TESTDIR)
790 # Make a new repository in one place
791 self.dir1 = os.path.join(self.root, "dir1")
792 Butler.makeRepo(self.dir1, config=Config(self.configFile))
794 # Move the yaml file to a different place and add a "root"
795 self.dir2 = os.path.join(self.root, "dir2")
796 safeMakeDir(self.dir2)
797 configFile1 = os.path.join(self.dir1, "butler.yaml")
798 config = Config(configFile1)
799 config["root"] = self.dir1
800 configFile2 = os.path.join(self.dir2, "butler2.yaml")
801 config.dumpToFile(configFile2)
802 os.remove(configFile1)
803 self.tmpConfigFile = configFile2
805 def testFileLocations(self):
806 self.assertNotEqual(self.dir1, self.dir2)
807 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
808 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
809 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
812class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
813 """Test that a config file created by makeRepo outside of repo works."""
815 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
817 def setUp(self):
818 self.root = tempfile.mkdtemp(dir=TESTDIR)
819 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
821 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
822 Butler.makeRepo(self.root, config=Config(self.configFile),
823 outfile=self.tmpConfigFile)
825 def tearDown(self):
826 if os.path.exists(self.root2):
827 shutil.rmtree(self.root2, ignore_errors=True)
828 super().tearDown()
830 def testConfigExistence(self):
831 c = Config(self.tmpConfigFile)
832 uri_config = ButlerURI(c["root"])
833 uri_expected = ButlerURI(self.root)
834 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
835 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
837 def testPutGet(self):
838 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
839 self.runPutGetTest(storageClass, "test_metric")
842class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
843 """Test that a config file created by makeRepo outside of repo works."""
845 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
847 def setUp(self):
848 self.root = tempfile.mkdtemp(dir=TESTDIR)
849 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
851 self.tmpConfigFile = self.root2
852 Butler.makeRepo(self.root, config=Config(self.configFile),
853 outfile=self.tmpConfigFile)
855 def testConfigExistence(self):
856 # Append the yaml file else Config constructor does not know the file
857 # type.
858 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
859 super().testConfigExistence()
862class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
863 """Test that a config file created by makeRepo outside of repo works."""
865 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
867 def setUp(self):
868 self.root = tempfile.mkdtemp(dir=TESTDIR)
869 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
871 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
872 Butler.makeRepo(self.root, config=Config(self.configFile),
873 outfile=self.tmpConfigFile)
876@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
877@mock_s3
878class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
879 """S3Datastore specialization of a butler; an S3 storage Datastore +
880 a local in-memory SqlRegistry.
881 """
882 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
883 fullConfigKey = None
884 validationCanFail = True
886 bucketName = "anybucketname"
887 """Name of the Bucket that will be used in the tests. The name is read from
888 the config file used with the tests during set-up.
889 """
891 root = "butlerRoot/"
892 """Root repository directory expected to be used in case useTempRoot=False.
893 Otherwise the root is set to a 20 characters long randomly generated string
894 during set-up.
895 """
897 datastoreStr = [f"datastore={root}"]
898 """Contains all expected root locations in a format expected to be
899 returned by Butler stringification.
900 """
902 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
903 """The expected format of the S3Datastore string."""
905 registryStr = f":memory:"
906 """Expected format of the Registry string."""
908 def genRoot(self):
909 """Returns a random string of len 20 to serve as a root
910 name for the temporary bucket repo.
912 This is equivalent to tempfile.mkdtemp as this is what self.root
913 becomes when useTempRoot is True.
914 """
915 rndstr = "".join(
916 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
917 )
918 return rndstr + "/"
920 def setUp(self):
921 config = Config(self.configFile)
922 uri = ButlerURI(config[".datastore.datastore.root"])
923 self.bucketName = uri.netloc
925 # set up some fake credentials if they do not exist
926 self.usingDummyCredentials = setAwsEnvCredentials()
928 if self.useTempRoot:
929 self.root = self.genRoot()
930 rooturi = f"s3://{self.bucketName}/{self.root}"
931 config.update({"datastore": {"datastore": {"root": rooturi}}})
933 # MOTO needs to know that we expect Bucket bucketname to exist
934 # (this used to be the class attribute bucketName)
935 s3 = boto3.resource("s3")
936 s3.create_bucket(Bucket=self.bucketName)
938 self.datastoreStr = f"datastore={self.root}"
939 self.datastoreName = [f"S3Datastore@{rooturi}"]
940 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
941 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
943 def tearDown(self):
944 s3 = boto3.resource("s3")
945 bucket = s3.Bucket(self.bucketName)
946 try:
947 bucket.objects.all().delete()
948 except botocore.exceptions.ClientError as e:
949 if e.response["Error"]["Code"] == "404":
950 # the key was not reachable - pass
951 pass
952 else:
953 raise
955 bucket = s3.Bucket(self.bucketName)
956 bucket.delete()
958 # unset any potentially set dummy credentials
959 if self.usingDummyCredentials:
960 unsetAwsEnvCredentials()
962 def checkFileExists(self, root, relpath):
963 """Checks if file exists at a given path (relative to root).
965 Test testPutTemplates verifies actual physical existance of the files
966 in the requested location. For S3Datastore this test is equivalent to
967 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
968 """
969 uri = ButlerURI(root)
970 client = boto3.client("s3")
971 return s3CheckFileExists(uri, client=client)[0]
973 @unittest.expectedFailure
974 def testImportExport(self):
975 super().testImportExport()
978if __name__ == "__main__": 978 ↛ 979line 978 didn't jump to line 979, because the condition on line 978 was never true
979 unittest.main()