Coverage for tests/test_butler.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import numpy as np
35try:
36 import boto3
37 import botocore
38 from moto import mock_s3
39except ImportError:
40 boto3 = None
42 def mock_s3(cls):
43 """A no-op decorator in case moto mock_s3 can not be imported.
44 """
45 return cls
47from lsst.utils import doImport
48from lsst.daf.butler.core.safeFileIo import safeMakeDir
49from lsst.daf.butler import Butler, Config, ButlerConfig
50from lsst.daf.butler import StorageClassFactory
51from lsst.daf.butler import DatasetType, DatasetRef
52from lsst.daf.butler import FileTemplateValidationError, ValidationError
53from lsst.daf.butler import FileDataset
54from lsst.daf.butler import CollectionSearch
55from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
56from lsst.daf.butler.core.location import ButlerURI
57from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
58 unsetAwsEnvCredentials)
60from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
62TESTDIR = os.path.abspath(os.path.dirname(__file__))
65def makeExampleMetrics():
66 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
67 {"a": [1, 2, 3],
68 "b": {"blue": 5, "red": "green"}},
69 [563, 234, 456.7, 752, 8, 9, 27]
70 )
73class TransactionTestError(Exception):
74 """Specific error for testing transactions, to prevent misdiagnosing
75 that might otherwise occur when a standard exception is used.
76 """
77 pass
80class ButlerConfigTests(unittest.TestCase):
81 """Simple tests for ButlerConfig that are not tested in other test cases.
82 """
84 def testSearchPath(self):
85 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
86 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
87 config1 = ButlerConfig(configFile)
88 self.assertNotIn("testConfigs", "\n".join(cm.output))
90 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
91 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
92 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
93 self.assertIn("testConfigs", "\n".join(cm.output))
95 key = ("datastore", "records", "table")
96 self.assertNotEqual(config1[key], config2[key])
97 self.assertEqual(config2[key], "override_record")
100class ButlerPutGetTests:
101 """Helper method for running a suite of put/get tests from different
102 butler configurations."""
104 root = None
106 @staticmethod
107 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
108 """Create a DatasetType and register it
109 """
110 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
111 registry.registerDatasetType(datasetType)
112 return datasetType
114 @classmethod
115 def setUpClass(cls):
116 cls.storageClassFactory = StorageClassFactory()
117 cls.storageClassFactory.addFromConfig(cls.configFile)
119 def assertGetComponents(self, butler, datasetRef, components, reference):
120 datasetTypeName = datasetRef.datasetType.name
121 dataId = datasetRef.dataId
122 for component in components:
123 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
124 result = butler.get(compTypeName, dataId)
125 self.assertEqual(result, getattr(reference, component))
127 def tearDown(self):
128 if self.root is not None and os.path.exists(self.root):
129 shutil.rmtree(self.root, ignore_errors=True)
131 def runPutGetTest(self, storageClass, datasetTypeName):
132 # New datasets will be added to run and tag, but we will only look in
133 # tag when looking up datasets.
134 run = "ingest/run"
135 tag = "ingest"
136 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
138 # There will not be a collection yet
139 collections = set(butler.registry.queryCollections())
140 self.assertEqual(collections, set([run, tag]))
142 # Create and register a DatasetType
143 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
145 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
147 # Add needed Dimensions
148 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
149 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
150 "name": "d-r",
151 "abstract_filter": "R"})
152 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
153 "id": 1,
154 "name": "default"})
155 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
156 "name": "fourtwentythree", "physical_filter": "d-r",
157 "visit_system": 1})
159 # Create and store a dataset
160 metric = makeExampleMetrics()
161 dataId = {"instrument": "DummyCamComp", "visit": 423}
163 # Create a DatasetRef for put
164 refIn = DatasetRef(datasetType, dataId, id=None)
166 # Put with a preexisting id should fail
167 with self.assertRaises(ValueError):
168 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
170 # Put and remove the dataset once as a DatasetRef, once as a dataId,
171 # and once with a DatasetType
172 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
173 with self.subTest(args=args):
174 ref = butler.put(metric, *args)
175 self.assertIsInstance(ref, DatasetRef)
177 # Test getDirect
178 metricOut = butler.getDirect(ref)
179 self.assertEqual(metric, metricOut)
180 # Test get
181 metricOut = butler.get(ref.datasetType.name, dataId)
182 self.assertEqual(metric, metricOut)
183 # Test get with a datasetRef
184 metricOut = butler.get(ref)
185 self.assertEqual(metric, metricOut)
186 # Test getDeferred with dataId
187 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
188 self.assertEqual(metric, metricOut)
189 # Test getDeferred with a datasetRef
190 metricOut = butler.getDeferred(ref).get()
191 self.assertEqual(metric, metricOut)
193 # Check we can get components
194 if storageClass.isComposite():
195 self.assertGetComponents(butler, ref,
196 ("summary", "data", "output"), metric)
198 # Remove from the tagged collection only; after that we
199 # shouldn't be able to find it unless we use the dataset_id.
200 butler.prune([ref])
201 with self.assertRaises(LookupError):
202 butler.datasetExists(*args)
203 # Registry still knows about it, if we use the dataset_id.
204 self.assertEqual(butler.registry.getDataset(ref.id), ref)
205 # If we use the output ref with the dataset_id, we should
206 # still be able to load it with getDirect().
207 self.assertEqual(metric, butler.getDirect(ref))
209 # Reinsert into collection, then delete from Datastore *and*
210 # remove from collection.
211 butler.registry.associate(tag, [ref])
212 butler.prune([ref], unstore=True)
213 # Lookup with original args should still fail.
214 with self.assertRaises(LookupError):
215 butler.datasetExists(*args)
216 # Now getDirect() should fail, too.
217 with self.assertRaises(FileNotFoundError):
218 butler.getDirect(ref)
219 # Registry still knows about it, if we use the dataset_id.
220 self.assertEqual(butler.registry.getDataset(ref.id), ref)
222 # Now remove the dataset completely.
223 butler.prune([ref], purge=True, unstore=True)
224 # Lookup with original args should still fail.
225 with self.assertRaises(LookupError):
226 butler.datasetExists(*args)
227 # getDirect() should still fail.
228 with self.assertRaises(FileNotFoundError):
229 butler.getDirect(ref)
230 # Registry shouldn't be able to find it by dataset_id anymore.
231 self.assertIsNone(butler.registry.getDataset(ref.id))
233 # Put the dataset again, since the last thing we did was remove it.
234 ref = butler.put(metric, refIn)
236 # Get with parameters
237 stop = 4
238 sliced = butler.get(ref, parameters={"slice": slice(stop)})
239 self.assertNotEqual(metric, sliced)
240 self.assertEqual(metric.summary, sliced.summary)
241 self.assertEqual(metric.output, sliced.output)
242 self.assertEqual(metric.data[:stop], sliced.data)
243 # getDeferred with parameters
244 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
245 self.assertNotEqual(metric, sliced)
246 self.assertEqual(metric.summary, sliced.summary)
247 self.assertEqual(metric.output, sliced.output)
248 self.assertEqual(metric.data[:stop], sliced.data)
249 # getDeferred with deferred parameters
250 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
251 self.assertNotEqual(metric, sliced)
252 self.assertEqual(metric.summary, sliced.summary)
253 self.assertEqual(metric.output, sliced.output)
254 self.assertEqual(metric.data[:stop], sliced.data)
256 if storageClass.isComposite():
257 # Delete one component and check that the other components
258 # can still be retrieved
259 metricOut = butler.get(ref.datasetType.name, dataId)
260 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
261 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
262 summary = butler.get(compNameS, dataId)
263 self.assertEqual(summary, metric.summary)
264 self.assertTrue(butler.datastore.exists(ref.components["summary"]))
266 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
267 butler.prune([compRef], unstore=True)
268 with self.assertRaises(LookupError):
269 butler.datasetExists(compNameS, dataId)
270 self.assertFalse(butler.datastore.exists(ref.components["summary"]))
271 self.assertTrue(butler.datastore.exists(ref.components["data"]))
272 data = butler.get(compNameD, dataId)
273 self.assertEqual(data, metric.data)
275 # Create a Dataset type that has the same name but is inconsistent.
276 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
277 self.storageClassFactory.getStorageClass("Config"))
279 # Getting with a dataset type that does not match registry fails
280 with self.assertRaises(ValueError):
281 butler.get(inconsistentDatasetType, dataId)
283 # Combining a DatasetRef with a dataId should fail
284 with self.assertRaises(ValueError):
285 butler.get(ref, dataId)
286 # Getting with an explicit ref should fail if the id doesn't match
287 with self.assertRaises(ValueError):
288 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
290 # Getting a dataset with unknown parameters should fail
291 with self.assertRaises(KeyError):
292 butler.get(ref, parameters={"unsupported": True})
294 # Check we have a collection
295 collections = set(butler.registry.queryCollections())
296 self.assertEqual(collections, {run, tag})
298 # Clean up to check that we can remove something that may have
299 # already had a component removed
300 butler.prune([ref], unstore=True, purge=True)
302 # Add a dataset back in since some downstream tests require
303 # something to be present
304 ref = butler.put(metric, refIn)
306 return butler
308 def testDeferredCollectionPassing(self):
309 # Construct a butler with no run or collection, but make it writeable.
310 butler = Butler(self.tmpConfigFile, writeable=True)
311 # Create and register a DatasetType
312 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
313 datasetType = self.addDatasetType("example", dimensions,
314 self.storageClassFactory.getStorageClass("StructuredData"),
315 butler.registry)
316 # Add needed Dimensions
317 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
318 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
319 "name": "d-r",
320 "abstract_filter": "R"})
321 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
322 "name": "fourtwentythree", "physical_filter": "d-r"})
323 dataId = {"instrument": "DummyCamComp", "visit": 423}
324 # Create dataset.
325 metric = makeExampleMetrics()
326 # Register a new run and put dataset.
327 run = "deferred"
328 butler.registry.registerRun(run)
329 ref = butler.put(metric, datasetType, dataId, run=run)
330 # Putting with no run should fail with TypeError.
331 with self.assertRaises(TypeError):
332 butler.put(metric, datasetType, dataId)
333 # Dataset should exist.
334 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
335 # We should be able to get the dataset back, but with and without
336 # a deferred dataset handle.
337 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
338 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
339 # Trying to find the dataset without any collection is a TypeError.
340 with self.assertRaises(TypeError):
341 butler.datasetExists(datasetType, dataId)
342 with self.assertRaises(TypeError):
343 butler.get(datasetType, dataId)
344 # Associate the dataset with a different collection.
345 butler.registry.registerCollection("tagged")
346 butler.registry.associate("tagged", [ref])
347 # Deleting the dataset from the new collection should make it findable
348 # in the original collection.
349 butler.prune([ref], tags=["tagged"])
350 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
353class ButlerTests(ButlerPutGetTests):
354 """Tests for Butler.
355 """
356 useTempRoot = True
358 def setUp(self):
359 """Create a new butler root for each test."""
360 if self.useTempRoot:
361 self.root = tempfile.mkdtemp(dir=TESTDIR)
362 Butler.makeRepo(self.root, config=Config(self.configFile))
363 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
364 else:
365 self.root = None
366 self.tmpConfigFile = self.configFile
368 def testConstructor(self):
369 """Independent test of constructor.
370 """
371 butler = Butler(self.tmpConfigFile, run="ingest")
372 self.assertIsInstance(butler, Butler)
374 collections = set(butler.registry.queryCollections())
375 self.assertEqual(collections, {"ingest"})
377 butler2 = Butler(butler=butler, collections=["other"])
378 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
379 self.assertIsNone(butler2.run)
380 self.assertIs(butler.registry, butler2.registry)
381 self.assertIs(butler.datastore, butler2.datastore)
383 def testBasicPutGet(self):
384 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
385 self.runPutGetTest(storageClass, "test_metric")
387 def testCompositePutGetConcrete(self):
388 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
389 self.runPutGetTest(storageClass, "test_metric")
391 def testCompositePutGetVirtual(self):
392 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
393 self.runPutGetTest(storageClass, "test_metric_comp")
395 def testIngest(self):
396 butler = Butler(self.tmpConfigFile, run="ingest")
398 # Create and register a DatasetType
399 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
401 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
402 datasetTypeName = "metric"
404 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
406 # Add needed Dimensions
407 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
408 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
409 "name": "d-r",
410 "abstract_filter": "R"})
411 for detector in (1, 2):
412 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
413 "full_name": f"detector{detector}"})
415 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
416 "name": "fourtwentythree", "physical_filter": "d-r"},
417 {"instrument": "DummyCamComp", "id": 424,
418 "name": "fourtwentyfour", "physical_filter": "d-r"})
420 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
421 dataRoot = os.path.join(TESTDIR, "data", "basic")
422 datasets = []
423 for detector in (1, 2):
424 detector_name = f"detector_{detector}"
425 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
426 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
427 # Create a DatasetRef for ingest
428 refIn = DatasetRef(datasetType, dataId, id=None)
430 datasets.append(FileDataset(path=metricFile,
431 refs=[refIn],
432 formatter=formatter))
434 butler.ingest(*datasets, transfer="copy")
436 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
437 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
439 metrics1 = butler.get(datasetTypeName, dataId1)
440 metrics2 = butler.get(datasetTypeName, dataId2)
441 self.assertNotEqual(metrics1, metrics2)
443 # Compare URIs
444 uri1 = butler.getUri(datasetTypeName, dataId1)
445 uri2 = butler.getUri(datasetTypeName, dataId2)
446 self.assertNotEqual(uri1, uri2)
448 # Now do a multi-dataset but single file ingest
449 metricFile = os.path.join(dataRoot, "detectors.yaml")
450 refs = []
451 for detector in (1, 2):
452 detector_name = f"detector_{detector}"
453 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
454 # Create a DatasetRef for ingest
455 refs.append(DatasetRef(datasetType, dataId, id=None))
457 datasets = []
458 datasets.append(FileDataset(path=metricFile,
459 refs=refs,
460 formatter=MultiDetectorFormatter))
462 butler.ingest(*datasets, transfer="copy")
464 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
465 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
467 multi1 = butler.get(datasetTypeName, dataId1)
468 multi2 = butler.get(datasetTypeName, dataId2)
470 self.assertEqual(multi1, metrics1)
471 self.assertEqual(multi2, metrics2)
473 # Compare URIs
474 uri1 = butler.getUri(datasetTypeName, dataId1)
475 uri2 = butler.getUri(datasetTypeName, dataId2)
476 self.assertEqual(uri1, uri2)
478 # Test that removing one does not break the second
479 # This line will issue a warning log message for a ChainedDatastore
480 # that uses an InMemoryDatastore since in-memory can not ingest
481 # files.
482 butler.prune([datasets[0].refs[0]], unstore=True, disassociate=False)
483 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
484 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
485 multi2b = butler.get(datasetTypeName, dataId2)
486 self.assertEqual(multi2, multi2b)
488 def testPickle(self):
489 """Test pickle support.
490 """
491 butler = Butler(self.tmpConfigFile, run="ingest")
492 butlerOut = pickle.loads(pickle.dumps(butler))
493 self.assertIsInstance(butlerOut, Butler)
494 self.assertEqual(butlerOut._config, butler._config)
495 self.assertEqual(butlerOut.collections, butler.collections)
496 self.assertEqual(butlerOut.run, butler.run)
498 def testGetDatasetTypes(self):
499 butler = Butler(self.tmpConfigFile, run="ingest")
500 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
501 dimensionEntries = [
502 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
503 {"instrument": "DummyCamComp"}),
504 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
505 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
506 ]
507 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
508 # Add needed Dimensions
509 for args in dimensionEntries:
510 butler.registry.insertDimensionData(*args)
512 # When a DatasetType is added to the registry entries are created
513 # for each component. Need entries for each component in the test
514 # configuration otherwise validation won't work. The ones that
515 # are deliberately broken will be ignored later.
516 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"}
517 components = set()
518 for datasetTypeName in datasetTypeNames:
519 # Create and register a DatasetType
520 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
522 for componentName in storageClass.components:
523 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
525 fromRegistry = set(butler.registry.queryDatasetTypes())
526 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
528 # Now that we have some dataset types registered, validate them
529 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
530 "datasetType.component"])
532 # Add a new datasetType that will fail template validation
533 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
534 if self.validationCanFail:
535 with self.assertRaises(ValidationError):
536 butler.validateConfiguration()
538 # Rerun validation but with a subset of dataset type names
539 butler.validateConfiguration(datasetTypeNames=["metric4"])
541 # Rerun validation but ignore the bad datasetType
542 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
543 "datasetType.component"])
545 def testTransaction(self):
546 butler = Butler(self.tmpConfigFile, run="ingest")
547 datasetTypeName = "test_metric"
548 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
549 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
550 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
551 "abstract_filter": "R"}),
552 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
553 "physical_filter": "d-r"}))
554 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
555 metric = makeExampleMetrics()
556 dataId = {"instrument": "DummyCam", "visit": 42}
557 with self.assertRaises(TransactionTestError):
558 with butler.transaction():
559 # Create and register a DatasetType
560 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
561 # Add needed Dimensions
562 for args in dimensionEntries:
563 butler.registry.insertDimensionData(*args)
564 # Store a dataset
565 ref = butler.put(metric, datasetTypeName, dataId)
566 self.assertIsInstance(ref, DatasetRef)
567 # Test getDirect
568 metricOut = butler.getDirect(ref)
569 self.assertEqual(metric, metricOut)
570 # Test get
571 metricOut = butler.get(datasetTypeName, dataId)
572 self.assertEqual(metric, metricOut)
573 # Check we can get components
574 self.assertGetComponents(butler, ref,
575 ("summary", "data", "output"), metric)
576 raise TransactionTestError("This should roll back the entire transaction")
578 with self.assertRaises(KeyError):
579 butler.registry.getDatasetType(datasetTypeName)
580 with self.assertRaises(LookupError):
581 butler.registry.expandDataId(dataId)
582 # Should raise KeyError for missing DatasetType
583 with self.assertRaises(KeyError):
584 butler.get(datasetTypeName, dataId)
585 # Also check explicitly if Dataset entry is missing
586 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
587 # Direct retrieval should not find the file in the Datastore
588 with self.assertRaises(FileNotFoundError):
589 butler.getDirect(ref)
591 def testMakeRepo(self):
592 """Test that we can write butler configuration to a new repository via
593 the Butler.makeRepo interface and then instantiate a butler from the
594 repo root.
595 """
596 # Do not run the test if we know this datastore configuration does
597 # not support a file system root
598 if self.fullConfigKey is None:
599 return
601 # Remove the file created in setUp
602 os.unlink(self.tmpConfigFile)
604 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile))
605 limited = Config(self.configFile)
606 butler1 = Butler(butlerConfig)
607 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
608 config=Config(self.configFile), overwrite=True)
609 full = Config(self.tmpConfigFile)
610 butler2 = Butler(butlerConfig)
611 # Butlers should have the same configuration regardless of whether
612 # defaults were expanded.
613 self.assertEqual(butler1._config, butler2._config)
614 # Config files loaded directly should not be the same.
615 self.assertNotEqual(limited, full)
616 # Make sure "limited" doesn't have a few keys we know it should be
617 # inheriting from defaults.
618 self.assertIn(self.fullConfigKey, full)
619 self.assertNotIn(self.fullConfigKey, limited)
621 # Collections don't appear until something is put in them
622 collections1 = set(butler1.registry.queryCollections())
623 self.assertEqual(collections1, set())
624 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
626 # Check that a config with no associated file name will not
627 # work properly with relocatable Butler repo
628 butlerConfig.configFile = None
629 with self.assertRaises(ValueError):
630 Butler(butlerConfig)
632 with self.assertRaises(FileExistsError):
633 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
634 config=Config(self.configFile), overwrite=False)
636 def testStringification(self):
637 butler = Butler(self.tmpConfigFile, run="ingest")
638 butlerStr = str(butler)
640 if self.datastoreStr is not None:
641 for testStr in self.datastoreStr:
642 self.assertIn(testStr, butlerStr)
643 if self.registryStr is not None:
644 self.assertIn(self.registryStr, butlerStr)
646 datastoreName = butler.datastore.name
647 if self.datastoreName is not None:
648 for testStr in self.datastoreName:
649 self.assertIn(testStr, datastoreName)
652class FileLikeDatastoreButlerTests(ButlerTests):
653 """Common tests and specialization of ButlerTests for butlers backed
654 by datastores that inherit from FileLikeDatastore.
655 """
657 def checkFileExists(self, root, path):
658 """Checks if file exists at a given path (relative to root).
660 Test testPutTemplates verifies actual physical existance of the files
661 in the requested location. For POSIXDatastore this test is equivalent
662 to `os.path.exists` call.
663 """
664 return os.path.exists(os.path.join(root, path))
666 def testPutTemplates(self):
667 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
668 butler = Butler(self.tmpConfigFile, run="ingest")
670 # Add needed Dimensions
671 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
672 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
673 "name": "d-r",
674 "abstract_filter": "R"})
675 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
676 "physical_filter": "d-r"})
677 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
678 "physical_filter": "d-r"})
680 # Create and store a dataset
681 metric = makeExampleMetrics()
683 # Create two almost-identical DatasetTypes (both will use default
684 # template)
685 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
686 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
687 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
688 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
690 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)}
691 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
692 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
694 # Put with exactly the data ID keys needed
695 ref = butler.put(metric, "metric1", dataId1)
696 self.assertTrue(self.checkFileExists(butler.datastore.root,
697 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
699 # Check the template based on dimensions
700 butler.datastore.templates.validateTemplates([ref])
702 # Put with extra data ID keys (physical_filter is an optional
703 # dependency); should not change template (at least the way we're
704 # defining them to behave now; the important thing is that they
705 # must be consistent).
706 ref = butler.put(metric, "metric2", dataId2)
707 self.assertTrue(self.checkFileExists(butler.datastore.root,
708 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
710 # Check the template based on dimensions
711 butler.datastore.templates.validateTemplates([ref])
713 # Now use a file template that will not result in unique filenames
714 ref = butler.put(metric, "metric3", dataId1)
716 # Check the template based on dimensions. This one is a bad template
717 with self.assertRaises(FileTemplateValidationError):
718 butler.datastore.templates.validateTemplates([ref])
720 with self.assertRaises(FileExistsError):
721 butler.put(metric, "metric3", dataId3)
723 def testImportExport(self):
724 # Run put/get tests just to create and populate a repo.
725 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
726 exportButler = self.runPutGetTest(storageClass, "test_metric")
727 # Test that the repo actually has at least one dataset.
728 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
729 self.assertGreater(len(datasets), 0)
730 # Export those datasets. We used TemporaryDirectory because there
731 # doesn't seem to be a way to get the filename (as opposed to the file
732 # object) from any of tempfile's temporary-file context managers.
733 with tempfile.TemporaryDirectory() as exportDir:
734 # TODO: When PosixDatastore supports transfer-on-exist, add tests
735 # for that.
736 exportFile = os.path.join(exportDir, "exports.yaml")
737 with exportButler.export(filename=exportFile) as export:
738 export.saveDatasets(datasets)
739 self.assertTrue(os.path.exists(exportFile))
740 with tempfile.TemporaryDirectory() as importDir:
741 Butler.makeRepo(importDir, config=Config(self.configFile))
742 importButler = Butler(importDir, run="ingest/run")
743 importButler.import_(filename=exportFile, directory=exportButler.datastore.root,
744 transfer="symlink")
745 for ref in datasets:
746 with self.subTest(ref=ref):
747 # Test for existence by passing in the DatasetType and
748 # data ID separately, to avoid lookup by dataset_id.
749 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
752class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
753 """PosixDatastore specialization of a butler"""
754 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
755 fullConfigKey = ".datastore.formatters"
756 validationCanFail = True
757 datastoreStr = ["/tmp"]
758 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
759 registryStr = "/gen3.sqlite3"
762class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
763 """InMemoryDatastore specialization of a butler"""
764 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
765 fullConfigKey = None
766 useTempRoot = False
767 validationCanFail = False
768 datastoreStr = ["datastore='InMemory"]
769 datastoreName = ["InMemoryDatastore@"]
770 registryStr = ":memory:"
772 def testIngest(self):
773 pass
776class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
777 """PosixDatastore specialization"""
778 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
779 fullConfigKey = ".datastore.datastores.1.formatters"
780 validationCanFail = True
781 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
782 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
783 "SecondDatastore"]
784 registryStr = "/gen3.sqlite3"
787class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
788 """Test that a yaml file in one location can refer to a root in another."""
790 datastoreStr = ["dir1"]
791 # Disable the makeRepo test since we are deliberately not using
792 # butler.yaml as the config name.
793 fullConfigKey = None
795 def setUp(self):
796 self.root = tempfile.mkdtemp(dir=TESTDIR)
798 # Make a new repository in one place
799 self.dir1 = os.path.join(self.root, "dir1")
800 Butler.makeRepo(self.dir1, config=Config(self.configFile))
802 # Move the yaml file to a different place and add a "root"
803 self.dir2 = os.path.join(self.root, "dir2")
804 safeMakeDir(self.dir2)
805 configFile1 = os.path.join(self.dir1, "butler.yaml")
806 config = Config(configFile1)
807 config["root"] = self.dir1
808 configFile2 = os.path.join(self.dir2, "butler2.yaml")
809 config.dumpToFile(configFile2)
810 os.remove(configFile1)
811 self.tmpConfigFile = configFile2
813 def testFileLocations(self):
814 self.assertNotEqual(self.dir1, self.dir2)
815 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
816 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
817 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
820class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
821 """Test that a config file created by makeRepo outside of repo works."""
823 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
825 def setUp(self):
826 self.root = tempfile.mkdtemp(dir=TESTDIR)
827 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
829 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
830 Butler.makeRepo(self.root, config=Config(self.configFile),
831 outfile=self.tmpConfigFile)
833 def tearDown(self):
834 if os.path.exists(self.root2):
835 shutil.rmtree(self.root2, ignore_errors=True)
836 super().tearDown()
838 def testConfigExistence(self):
839 c = Config(self.tmpConfigFile)
840 uri_config = ButlerURI(c["root"])
841 uri_expected = ButlerURI(self.root)
842 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
843 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
845 def testPutGet(self):
846 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
847 self.runPutGetTest(storageClass, "test_metric")
850class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
851 """Test that a config file created by makeRepo outside of repo works."""
853 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
855 def setUp(self):
856 self.root = tempfile.mkdtemp(dir=TESTDIR)
857 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
859 self.tmpConfigFile = self.root2
860 Butler.makeRepo(self.root, config=Config(self.configFile),
861 outfile=self.tmpConfigFile)
863 def testConfigExistence(self):
864 # Append the yaml file else Config constructor does not know the file
865 # type.
866 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
867 super().testConfigExistence()
870class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
871 """Test that a config file created by makeRepo outside of repo works."""
873 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
875 def setUp(self):
876 self.root = tempfile.mkdtemp(dir=TESTDIR)
877 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
879 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
880 Butler.makeRepo(self.root, config=Config(self.configFile),
881 outfile=self.tmpConfigFile)
884@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
885@mock_s3
886class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
887 """S3Datastore specialization of a butler; an S3 storage Datastore +
888 a local in-memory SqlRegistry.
889 """
890 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
891 fullConfigKey = None
892 validationCanFail = True
894 bucketName = "anybucketname"
895 """Name of the Bucket that will be used in the tests. The name is read from
896 the config file used with the tests during set-up.
897 """
899 root = "butlerRoot/"
900 """Root repository directory expected to be used in case useTempRoot=False.
901 Otherwise the root is set to a 20 characters long randomly generated string
902 during set-up.
903 """
905 datastoreStr = [f"datastore={root}"]
906 """Contains all expected root locations in a format expected to be
907 returned by Butler stringification.
908 """
910 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
911 """The expected format of the S3Datastore string."""
913 registryStr = f":memory:"
914 """Expected format of the Registry string."""
916 def genRoot(self):
917 """Returns a random string of len 20 to serve as a root
918 name for the temporary bucket repo.
920 This is equivalent to tempfile.mkdtemp as this is what self.root
921 becomes when useTempRoot is True.
922 """
923 rndstr = "".join(
924 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
925 )
926 return rndstr + "/"
928 def setUp(self):
929 config = Config(self.configFile)
930 uri = ButlerURI(config[".datastore.datastore.root"])
931 self.bucketName = uri.netloc
933 # set up some fake credentials if they do not exist
934 self.usingDummyCredentials = setAwsEnvCredentials()
936 if self.useTempRoot:
937 self.root = self.genRoot()
938 rooturi = f"s3://{self.bucketName}/{self.root}"
939 config.update({"datastore": {"datastore": {"root": rooturi}}})
941 # MOTO needs to know that we expect Bucket bucketname to exist
942 # (this used to be the class attribute bucketName)
943 s3 = boto3.resource("s3")
944 s3.create_bucket(Bucket=self.bucketName)
946 self.datastoreStr = f"datastore={self.root}"
947 self.datastoreName = [f"S3Datastore@{rooturi}"]
948 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
949 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
951 def tearDown(self):
952 s3 = boto3.resource("s3")
953 bucket = s3.Bucket(self.bucketName)
954 try:
955 bucket.objects.all().delete()
956 except botocore.exceptions.ClientError as e:
957 if e.response["Error"]["Code"] == "404":
958 # the key was not reachable - pass
959 pass
960 else:
961 raise
963 bucket = s3.Bucket(self.bucketName)
964 bucket.delete()
966 # unset any potentially set dummy credentials
967 if self.usingDummyCredentials:
968 unsetAwsEnvCredentials()
970 def checkFileExists(self, root, relpath):
971 """Checks if file exists at a given path (relative to root).
973 Test testPutTemplates verifies actual physical existance of the files
974 in the requested location. For S3Datastore this test is equivalent to
975 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
976 """
977 uri = ButlerURI(root)
978 client = boto3.client("s3")
979 return s3CheckFileExists(uri, client=client)[0]
981 @unittest.expectedFailure
982 def testImportExport(self):
983 super().testImportExport()
986if __name__ == "__main__": 986 ↛ 987line 986 didn't jump to line 987, because the condition on line 986 was never true
987 unittest.main()