Coverage for tests/test_butler.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import numpy as np
35try:
36 import boto3
37 import botocore
38 from moto import mock_s3
39except ImportError:
40 boto3 = None
42 def mock_s3(cls):
43 """A no-op decorator in case moto mock_s3 can not be imported.
44 """
45 return cls
47from lsst.utils import doImport
48from lsst.daf.butler.core.safeFileIo import safeMakeDir
49from lsst.daf.butler import Butler, Config, ButlerConfig
50from lsst.daf.butler import StorageClassFactory
51from lsst.daf.butler import DatasetType, DatasetRef
52from lsst.daf.butler import FileTemplateValidationError, ValidationError
53from lsst.daf.butler import FileDataset
54from lsst.daf.butler import CollectionSearch
55from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
56from lsst.daf.butler.core.location import ButlerURI
57from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
58 unsetAwsEnvCredentials)
60from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
62TESTDIR = os.path.abspath(os.path.dirname(__file__))
65def makeExampleMetrics():
66 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
67 {"a": [1, 2, 3],
68 "b": {"blue": 5, "red": "green"}},
69 [563, 234, 456.7, 752, 8, 9, 27]
70 )
73class TransactionTestError(Exception):
74 """Specific error for testing transactions, to prevent misdiagnosing
75 that might otherwise occur when a standard exception is used.
76 """
77 pass
80class ButlerConfigTests(unittest.TestCase):
81 """Simple tests for ButlerConfig that are not tested in other test cases.
82 """
84 def testSearchPath(self):
85 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
86 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
87 config1 = ButlerConfig(configFile)
88 self.assertNotIn("testConfigs", "\n".join(cm.output))
90 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
91 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
92 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
93 self.assertIn("testConfigs", "\n".join(cm.output))
95 key = ("datastore", "records", "table")
96 self.assertNotEqual(config1[key], config2[key])
97 self.assertEqual(config2[key], "override_record")
100class ButlerPutGetTests:
101 """Helper method for running a suite of put/get tests from different
102 butler configurations."""
104 root = None
106 @staticmethod
107 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
108 """Create a DatasetType and register it
109 """
110 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
111 registry.registerDatasetType(datasetType)
112 return datasetType
114 @classmethod
115 def setUpClass(cls):
116 cls.storageClassFactory = StorageClassFactory()
117 cls.storageClassFactory.addFromConfig(cls.configFile)
119 def assertGetComponents(self, butler, datasetRef, components, reference):
120 datasetTypeName = datasetRef.datasetType.name
121 dataId = datasetRef.dataId
122 for component in components:
123 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
124 result = butler.get(compTypeName, dataId)
125 self.assertEqual(result, getattr(reference, component))
127 def tearDown(self):
128 if self.root is not None and os.path.exists(self.root):
129 shutil.rmtree(self.root, ignore_errors=True)
131 def runPutGetTest(self, storageClass, datasetTypeName):
132 # New datasets will be added to run and tag, but we will only look in
133 # tag when looking up datasets.
134 run = "ingest/run"
135 tag = "ingest"
136 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
138 # There will not be a collection yet
139 collections = set(butler.registry.queryCollections())
140 self.assertEqual(collections, set([run, tag]))
142 # Create and register a DatasetType
143 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
145 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
147 # Add needed Dimensions
148 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
149 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
150 "name": "d-r",
151 "abstract_filter": "R"})
152 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
153 "name": "fourtwentythree", "physical_filter": "d-r"})
155 # Create and store a dataset
156 metric = makeExampleMetrics()
157 dataId = {"instrument": "DummyCamComp", "visit": 423}
159 # Create a DatasetRef for put
160 refIn = DatasetRef(datasetType, dataId, id=None)
162 # Put with a preexisting id should fail
163 with self.assertRaises(ValueError):
164 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
166 # Put and remove the dataset once as a DatasetRef, once as a dataId,
167 # and once with a DatasetType
168 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
169 with self.subTest(args=args):
170 ref = butler.put(metric, *args)
171 self.assertIsInstance(ref, DatasetRef)
173 # Test getDirect
174 metricOut = butler.getDirect(ref)
175 self.assertEqual(metric, metricOut)
176 # Test get
177 metricOut = butler.get(ref.datasetType.name, dataId)
178 self.assertEqual(metric, metricOut)
179 # Test get with a datasetRef
180 metricOut = butler.get(ref)
181 self.assertEqual(metric, metricOut)
182 # Test getDeferred with dataId
183 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
184 self.assertEqual(metric, metricOut)
185 # Test getDeferred with a datasetRef
186 metricOut = butler.getDeferred(ref).get()
187 self.assertEqual(metric, metricOut)
189 # Check we can get components
190 if storageClass.isComposite():
191 self.assertGetComponents(butler, ref,
192 ("summary", "data", "output"), metric)
194 # Remove from the tagged collection only; after that we
195 # shouldn't be able to find it unless we use the dataset_id.
196 butler.prune([ref])
197 with self.assertRaises(LookupError):
198 butler.datasetExists(*args)
199 # Registry still knows about it, if we use the dataset_id.
200 self.assertEqual(butler.registry.getDataset(ref.id), ref)
201 # If we use the output ref with the dataset_id, we should
202 # still be able to load it with getDirect().
203 self.assertEqual(metric, butler.getDirect(ref))
205 # Reinsert into collection, then delete from Datastore *and*
206 # remove from collection.
207 butler.registry.associate(tag, [ref])
208 butler.prune([ref], unstore=True)
209 # Lookup with original args should still fail.
210 with self.assertRaises(LookupError):
211 butler.datasetExists(*args)
212 # Now getDirect() should fail, too.
213 with self.assertRaises(FileNotFoundError):
214 butler.getDirect(ref)
215 # Registry still knows about it, if we use the dataset_id.
216 self.assertEqual(butler.registry.getDataset(ref.id), ref)
218 # Now remove the dataset completely.
219 butler.prune([ref], purge=True, unstore=True)
220 # Lookup with original args should still fail.
221 with self.assertRaises(LookupError):
222 butler.datasetExists(*args)
223 # getDirect() should still fail.
224 with self.assertRaises(FileNotFoundError):
225 butler.getDirect(ref)
226 # Registry shouldn't be able to find it by dataset_id anymore.
227 self.assertIsNone(butler.registry.getDataset(ref.id))
229 # Put the dataset again, since the last thing we did was remove it.
230 ref = butler.put(metric, refIn)
232 # Get with parameters
233 stop = 4
234 sliced = butler.get(ref, parameters={"slice": slice(stop)})
235 self.assertNotEqual(metric, sliced)
236 self.assertEqual(metric.summary, sliced.summary)
237 self.assertEqual(metric.output, sliced.output)
238 self.assertEqual(metric.data[:stop], sliced.data)
239 # getDeferred with parameters
240 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
241 self.assertNotEqual(metric, sliced)
242 self.assertEqual(metric.summary, sliced.summary)
243 self.assertEqual(metric.output, sliced.output)
244 self.assertEqual(metric.data[:stop], sliced.data)
245 # getDeferred with deferred parameters
246 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
247 self.assertNotEqual(metric, sliced)
248 self.assertEqual(metric.summary, sliced.summary)
249 self.assertEqual(metric.output, sliced.output)
250 self.assertEqual(metric.data[:stop], sliced.data)
252 if storageClass.isComposite():
253 # Delete one component and check that the other components
254 # can still be retrieved
255 metricOut = butler.get(ref.datasetType.name, dataId)
256 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
257 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
258 summary = butler.get(compNameS, dataId)
259 self.assertEqual(summary, metric.summary)
260 self.assertTrue(butler.datastore.exists(ref.components["summary"]))
262 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
263 butler.prune([compRef], unstore=True)
264 with self.assertRaises(LookupError):
265 butler.datasetExists(compNameS, dataId)
266 self.assertFalse(butler.datastore.exists(ref.components["summary"]))
267 self.assertTrue(butler.datastore.exists(ref.components["data"]))
268 data = butler.get(compNameD, dataId)
269 self.assertEqual(data, metric.data)
271 # Create a Dataset type that has the same name but is inconsistent.
272 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
273 self.storageClassFactory.getStorageClass("Config"))
275 # Getting with a dataset type that does not match registry fails
276 with self.assertRaises(ValueError):
277 butler.get(inconsistentDatasetType, dataId)
279 # Combining a DatasetRef with a dataId should fail
280 with self.assertRaises(ValueError):
281 butler.get(ref, dataId)
282 # Getting with an explicit ref should fail if the id doesn't match
283 with self.assertRaises(ValueError):
284 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
286 # Getting a dataset with unknown parameters should fail
287 with self.assertRaises(KeyError):
288 butler.get(ref, parameters={"unsupported": True})
290 # Check we have a collection
291 collections = set(butler.registry.queryCollections())
292 self.assertEqual(collections, {run, tag})
294 # Clean up to check that we can remove something that may have
295 # already had a component removed
296 butler.prune([ref], unstore=True, purge=True)
298 # Add a dataset back in since some downstream tests require
299 # something to be present
300 ref = butler.put(metric, refIn)
302 return butler
304 def testDeferredCollectionPassing(self):
305 # Construct a butler with no run or collection, but make it writeable.
306 butler = Butler(self.tmpConfigFile, writeable=True)
307 # Create and register a DatasetType
308 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
309 datasetType = self.addDatasetType("example", dimensions,
310 self.storageClassFactory.getStorageClass("StructuredData"),
311 butler.registry)
312 # Add needed Dimensions
313 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
314 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
315 "name": "d-r",
316 "abstract_filter": "R"})
317 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
318 "name": "fourtwentythree", "physical_filter": "d-r"})
319 dataId = {"instrument": "DummyCamComp", "visit": 423}
320 # Create dataset.
321 metric = makeExampleMetrics()
322 # Register a new run and put dataset.
323 run = "deferred"
324 butler.registry.registerRun(run)
325 ref = butler.put(metric, datasetType, dataId, run=run)
326 # Putting with no run should fail with TypeError.
327 with self.assertRaises(TypeError):
328 butler.put(metric, datasetType, dataId)
329 # Dataset should exist.
330 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
331 # We should be able to get the dataset back, but with and without
332 # a deferred dataset handle.
333 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
334 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
335 # Trying to find the dataset without any collection is a TypeError.
336 with self.assertRaises(TypeError):
337 butler.datasetExists(datasetType, dataId)
338 with self.assertRaises(TypeError):
339 butler.get(datasetType, dataId)
340 # Associate the dataset with a different collection.
341 butler.registry.registerCollection("tagged")
342 butler.registry.associate("tagged", [ref])
343 # Deleting the dataset from the new collection should make it findable
344 # in the original collection.
345 butler.prune([ref], tags=["tagged"])
346 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
349class ButlerTests(ButlerPutGetTests):
350 """Tests for Butler.
351 """
352 useTempRoot = True
354 def setUp(self):
355 """Create a new butler root for each test."""
356 if self.useTempRoot:
357 self.root = tempfile.mkdtemp(dir=TESTDIR)
358 Butler.makeRepo(self.root, config=Config(self.configFile))
359 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
360 else:
361 self.root = None
362 self.tmpConfigFile = self.configFile
364 def testConstructor(self):
365 """Independent test of constructor.
366 """
367 butler = Butler(self.tmpConfigFile, run="ingest")
368 self.assertIsInstance(butler, Butler)
370 collections = set(butler.registry.queryCollections())
371 self.assertEqual(collections, {"ingest"})
373 butler2 = Butler(butler=butler, collections=["other"])
374 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
375 self.assertIsNone(butler2.run)
376 self.assertIs(butler.registry, butler2.registry)
377 self.assertIs(butler.datastore, butler2.datastore)
379 def testBasicPutGet(self):
380 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
381 self.runPutGetTest(storageClass, "test_metric")
383 def testCompositePutGetConcrete(self):
384 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
385 self.runPutGetTest(storageClass, "test_metric")
387 def testCompositePutGetVirtual(self):
388 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
389 self.runPutGetTest(storageClass, "test_metric_comp")
391 def testIngest(self):
392 butler = Butler(self.tmpConfigFile, run="ingest")
394 # Create and register a DatasetType
395 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
397 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
398 datasetTypeName = "metric"
400 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
402 # Add needed Dimensions
403 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
404 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
405 "name": "d-r",
406 "abstract_filter": "R"})
407 for detector in (1, 2):
408 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
409 "full_name": f"detector{detector}"})
411 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
412 "name": "fourtwentythree", "physical_filter": "d-r"},
413 {"instrument": "DummyCamComp", "id": 424,
414 "name": "fourtwentyfour", "physical_filter": "d-r"})
416 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
417 dataRoot = os.path.join(TESTDIR, "data", "basic")
418 datasets = []
419 for detector in (1, 2):
420 detector_name = f"detector_{detector}"
421 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
422 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
423 # Create a DatasetRef for ingest
424 refIn = DatasetRef(datasetType, dataId, id=None)
426 datasets.append(FileDataset(path=metricFile,
427 refs=[refIn],
428 formatter=formatter))
430 butler.ingest(*datasets, transfer="copy")
432 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
433 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
435 metrics1 = butler.get(datasetTypeName, dataId1)
436 metrics2 = butler.get(datasetTypeName, dataId2)
437 self.assertNotEqual(metrics1, metrics2)
439 # Compare URIs
440 uri1 = butler.getUri(datasetTypeName, dataId1)
441 uri2 = butler.getUri(datasetTypeName, dataId2)
442 self.assertNotEqual(uri1, uri2)
444 # Now do a multi-dataset but single file ingest
445 metricFile = os.path.join(dataRoot, "detectors.yaml")
446 refs = []
447 for detector in (1, 2):
448 detector_name = f"detector_{detector}"
449 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
450 # Create a DatasetRef for ingest
451 refs.append(DatasetRef(datasetType, dataId, id=None))
453 datasets = []
454 datasets.append(FileDataset(path=metricFile,
455 refs=refs,
456 formatter=MultiDetectorFormatter))
458 butler.ingest(*datasets, transfer="copy")
460 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
461 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
463 multi1 = butler.get(datasetTypeName, dataId1)
464 multi2 = butler.get(datasetTypeName, dataId2)
466 self.assertEqual(multi1, metrics1)
467 self.assertEqual(multi2, metrics2)
469 # Compare URIs
470 uri1 = butler.getUri(datasetTypeName, dataId1)
471 uri2 = butler.getUri(datasetTypeName, dataId2)
472 self.assertEqual(uri1, uri2)
474 # Test that removing one does not break the second
475 butler.prune([datasets[0].refs[0]], unstore=True, disassociate=False)
476 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
477 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
478 multi2b = butler.get(datasetTypeName, dataId2)
479 self.assertEqual(multi2, multi2b)
481 def testPickle(self):
482 """Test pickle support.
483 """
484 butler = Butler(self.tmpConfigFile, run="ingest")
485 butlerOut = pickle.loads(pickle.dumps(butler))
486 self.assertIsInstance(butlerOut, Butler)
487 self.assertEqual(butlerOut._config, butler._config)
488 self.assertEqual(butlerOut.collections, butler.collections)
489 self.assertEqual(butlerOut.run, butler.run)
491 def testGetDatasetTypes(self):
492 butler = Butler(self.tmpConfigFile, run="ingest")
493 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
494 dimensionEntries = [
495 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
496 {"instrument": "DummyCamComp"}),
497 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
498 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
499 ]
500 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
501 # Add needed Dimensions
502 for args in dimensionEntries:
503 butler.registry.insertDimensionData(*args)
505 # When a DatasetType is added to the registry entries are created
506 # for each component. Need entries for each component in the test
507 # configuration otherwise validation won't work. The ones that
508 # are deliberately broken will be ignored later.
509 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"}
510 components = set()
511 for datasetTypeName in datasetTypeNames:
512 # Create and register a DatasetType
513 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
515 for componentName in storageClass.components:
516 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
518 fromRegistry = set(butler.registry.queryDatasetTypes())
519 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
521 # Now that we have some dataset types registered, validate them
522 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
523 "datasetType.component"])
525 # Add a new datasetType that will fail template validation
526 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
527 if self.validationCanFail:
528 with self.assertRaises(ValidationError):
529 butler.validateConfiguration()
531 # Rerun validation but with a subset of dataset type names
532 butler.validateConfiguration(datasetTypeNames=["metric4"])
534 # Rerun validation but ignore the bad datasetType
535 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
536 "datasetType.component"])
538 def testTransaction(self):
539 butler = Butler(self.tmpConfigFile, run="ingest")
540 datasetTypeName = "test_metric"
541 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
542 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
543 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
544 "abstract_filter": "R"}),
545 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
546 "physical_filter": "d-r"}))
547 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
548 metric = makeExampleMetrics()
549 dataId = {"instrument": "DummyCam", "visit": 42}
550 with self.assertRaises(TransactionTestError):
551 with butler.transaction():
552 # Create and register a DatasetType
553 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
554 # Add needed Dimensions
555 for args in dimensionEntries:
556 butler.registry.insertDimensionData(*args)
557 # Store a dataset
558 ref = butler.put(metric, datasetTypeName, dataId)
559 self.assertIsInstance(ref, DatasetRef)
560 # Test getDirect
561 metricOut = butler.getDirect(ref)
562 self.assertEqual(metric, metricOut)
563 # Test get
564 metricOut = butler.get(datasetTypeName, dataId)
565 self.assertEqual(metric, metricOut)
566 # Check we can get components
567 self.assertGetComponents(butler, ref,
568 ("summary", "data", "output"), metric)
569 raise TransactionTestError("This should roll back the entire transaction")
571 with self.assertRaises(KeyError):
572 butler.registry.getDatasetType(datasetTypeName)
573 with self.assertRaises(LookupError):
574 butler.registry.expandDataId(dataId)
575 # Should raise KeyError for missing DatasetType
576 with self.assertRaises(KeyError):
577 butler.get(datasetTypeName, dataId)
578 # Also check explicitly if Dataset entry is missing
579 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
580 # Direct retrieval should not find the file in the Datastore
581 with self.assertRaises(FileNotFoundError):
582 butler.getDirect(ref)
584 def testMakeRepo(self):
585 """Test that we can write butler configuration to a new repository via
586 the Butler.makeRepo interface and then instantiate a butler from the
587 repo root.
588 """
589 # Do not run the test if we know this datastore configuration does
590 # not support a file system root
591 if self.fullConfigKey is None:
592 return
594 # Remove the file created in setUp
595 os.unlink(self.tmpConfigFile)
597 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile))
598 limited = Config(self.configFile)
599 butler1 = Butler(butlerConfig)
600 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
601 config=Config(self.configFile), overwrite=True)
602 full = Config(self.tmpConfigFile)
603 butler2 = Butler(butlerConfig)
604 # Butlers should have the same configuration regardless of whether
605 # defaults were expanded.
606 self.assertEqual(butler1._config, butler2._config)
607 # Config files loaded directly should not be the same.
608 self.assertNotEqual(limited, full)
609 # Make sure "limited" doesn't have a few keys we know it should be
610 # inheriting from defaults.
611 self.assertIn(self.fullConfigKey, full)
612 self.assertNotIn(self.fullConfigKey, limited)
614 # Collections don't appear until something is put in them
615 collections1 = set(butler1.registry.queryCollections())
616 self.assertEqual(collections1, set())
617 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
619 # Check that a config with no associated file name will not
620 # work properly with relocatable Butler repo
621 butlerConfig.configFile = None
622 with self.assertRaises(ValueError):
623 Butler(butlerConfig)
625 with self.assertRaises(FileExistsError):
626 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
627 config=Config(self.configFile), overwrite=False)
629 def testStringification(self):
630 butler = Butler(self.tmpConfigFile, run="ingest")
631 butlerStr = str(butler)
633 if self.datastoreStr is not None:
634 for testStr in self.datastoreStr:
635 self.assertIn(testStr, butlerStr)
636 if self.registryStr is not None:
637 self.assertIn(self.registryStr, butlerStr)
639 datastoreName = butler.datastore.name
640 if self.datastoreName is not None:
641 for testStr in self.datastoreName:
642 self.assertIn(testStr, datastoreName)
645class FileLikeDatastoreButlerTests(ButlerTests):
646 """Common tests and specialization of ButlerTests for butlers backed
647 by datastores that inherit from FileLikeDatastore.
648 """
650 def checkFileExists(self, root, path):
651 """Checks if file exists at a given path (relative to root).
653 Test testPutTemplates verifies actual physical existance of the files
654 in the requested location. For POSIXDatastore this test is equivalent
655 to `os.path.exists` call.
656 """
657 return os.path.exists(os.path.join(root, path))
659 def testPutTemplates(self):
660 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
661 butler = Butler(self.tmpConfigFile, run="ingest")
663 # Add needed Dimensions
664 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
665 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
666 "name": "d-r",
667 "abstract_filter": "R"})
668 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
669 "physical_filter": "d-r"})
670 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
671 "physical_filter": "d-r"})
673 # Create and store a dataset
674 metric = makeExampleMetrics()
676 # Create two almost-identical DatasetTypes (both will use default
677 # template)
678 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
679 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
680 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
681 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
683 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)}
684 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
685 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
687 # Put with exactly the data ID keys needed
688 ref = butler.put(metric, "metric1", dataId1)
689 self.assertTrue(self.checkFileExists(butler.datastore.root,
690 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
692 # Check the template based on dimensions
693 butler.datastore.templates.validateTemplates([ref])
695 # Put with extra data ID keys (physical_filter is an optional
696 # dependency); should not change template (at least the way we're
697 # defining them to behave now; the important thing is that they
698 # must be consistent).
699 ref = butler.put(metric, "metric2", dataId2)
700 self.assertTrue(self.checkFileExists(butler.datastore.root,
701 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
703 # Check the template based on dimensions
704 butler.datastore.templates.validateTemplates([ref])
706 # Now use a file template that will not result in unique filenames
707 ref = butler.put(metric, "metric3", dataId1)
709 # Check the template based on dimensions. This one is a bad template
710 with self.assertRaises(FileTemplateValidationError):
711 butler.datastore.templates.validateTemplates([ref])
713 with self.assertRaises(FileExistsError):
714 butler.put(metric, "metric3", dataId3)
716 def testImportExport(self):
717 # Run put/get tests just to create and populate a repo.
718 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
719 exportButler = self.runPutGetTest(storageClass, "test_metric")
720 # Test that the repo actually has at least one dataset.
721 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
722 self.assertGreater(len(datasets), 0)
723 # Export those datasets. We used TemporaryDirectory because there
724 # doesn't seem to be a way to get the filename (as opposed to the file
725 # object) from any of tempfile's temporary-file context managers.
726 with tempfile.TemporaryDirectory() as exportDir:
727 # TODO: When PosixDatastore supports transfer-on-exist, add tests
728 # for that.
729 exportFile = os.path.join(exportDir, "exports.yaml")
730 with exportButler.export(filename=exportFile) as export:
731 export.saveDatasets(datasets)
732 self.assertTrue(os.path.exists(exportFile))
733 with tempfile.TemporaryDirectory() as importDir:
734 Butler.makeRepo(importDir, config=Config(self.configFile))
735 importButler = Butler(importDir, run="ingest/run")
736 importButler.import_(filename=exportFile, directory=exportButler.datastore.root,
737 transfer="symlink")
738 for ref in datasets:
739 with self.subTest(ref=ref):
740 # Test for existence by passing in the DatasetType and
741 # data ID separately, to avoid lookup by dataset_id.
742 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
745class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
746 """PosixDatastore specialization of a butler"""
747 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
748 fullConfigKey = ".datastore.formatters"
749 validationCanFail = True
750 datastoreStr = ["/tmp"]
751 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
752 registryStr = "/gen3.sqlite3"
755class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
756 """InMemoryDatastore specialization of a butler"""
757 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
758 fullConfigKey = None
759 useTempRoot = False
760 validationCanFail = False
761 datastoreStr = ["datastore='InMemory"]
762 datastoreName = ["InMemoryDatastore@"]
763 registryStr = ":memory:"
765 def testIngest(self):
766 pass
769class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
770 """PosixDatastore specialization"""
771 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
772 fullConfigKey = ".datastore.datastores.1.formatters"
773 validationCanFail = True
774 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
775 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
776 "SecondDatastore"]
777 registryStr = "/gen3.sqlite3"
780class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
781 """Test that a yaml file in one location can refer to a root in another."""
783 datastoreStr = ["dir1"]
784 # Disable the makeRepo test since we are deliberately not using
785 # butler.yaml as the config name.
786 fullConfigKey = None
788 def setUp(self):
789 self.root = tempfile.mkdtemp(dir=TESTDIR)
791 # Make a new repository in one place
792 self.dir1 = os.path.join(self.root, "dir1")
793 Butler.makeRepo(self.dir1, config=Config(self.configFile))
795 # Move the yaml file to a different place and add a "root"
796 self.dir2 = os.path.join(self.root, "dir2")
797 safeMakeDir(self.dir2)
798 configFile1 = os.path.join(self.dir1, "butler.yaml")
799 config = Config(configFile1)
800 config["root"] = self.dir1
801 configFile2 = os.path.join(self.dir2, "butler2.yaml")
802 config.dumpToFile(configFile2)
803 os.remove(configFile1)
804 self.tmpConfigFile = configFile2
806 def testFileLocations(self):
807 self.assertNotEqual(self.dir1, self.dir2)
808 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
809 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
810 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
813class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
814 """Test that a config file created by makeRepo outside of repo works."""
816 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
818 def setUp(self):
819 self.root = tempfile.mkdtemp(dir=TESTDIR)
820 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
822 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
823 Butler.makeRepo(self.root, config=Config(self.configFile),
824 outfile=self.tmpConfigFile)
826 def tearDown(self):
827 if os.path.exists(self.root2):
828 shutil.rmtree(self.root2, ignore_errors=True)
829 super().tearDown()
831 def testConfigExistence(self):
832 c = Config(self.tmpConfigFile)
833 uri_config = ButlerURI(c["root"])
834 uri_expected = ButlerURI(self.root)
835 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
836 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
838 def testPutGet(self):
839 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
840 self.runPutGetTest(storageClass, "test_metric")
843class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
844 """Test that a config file created by makeRepo outside of repo works."""
846 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
848 def setUp(self):
849 self.root = tempfile.mkdtemp(dir=TESTDIR)
850 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
852 self.tmpConfigFile = self.root2
853 Butler.makeRepo(self.root, config=Config(self.configFile),
854 outfile=self.tmpConfigFile)
856 def testConfigExistence(self):
857 # Append the yaml file else Config constructor does not know the file
858 # type.
859 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
860 super().testConfigExistence()
863class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
864 """Test that a config file created by makeRepo outside of repo works."""
866 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
868 def setUp(self):
869 self.root = tempfile.mkdtemp(dir=TESTDIR)
870 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
872 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
873 Butler.makeRepo(self.root, config=Config(self.configFile),
874 outfile=self.tmpConfigFile)
877@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
878@mock_s3
879class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
880 """S3Datastore specialization of a butler; an S3 storage Datastore +
881 a local in-memory SqlRegistry.
882 """
883 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
884 fullConfigKey = None
885 validationCanFail = True
887 bucketName = "anybucketname"
888 """Name of the Bucket that will be used in the tests. The name is read from
889 the config file used with the tests during set-up.
890 """
892 root = "butlerRoot/"
893 """Root repository directory expected to be used in case useTempRoot=False.
894 Otherwise the root is set to a 20 characters long randomly generated string
895 during set-up.
896 """
898 datastoreStr = [f"datastore={root}"]
899 """Contains all expected root locations in a format expected to be
900 returned by Butler stringification.
901 """
903 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
904 """The expected format of the S3Datastore string."""
906 registryStr = f":memory:"
907 """Expected format of the Registry string."""
909 def genRoot(self):
910 """Returns a random string of len 20 to serve as a root
911 name for the temporary bucket repo.
913 This is equivalent to tempfile.mkdtemp as this is what self.root
914 becomes when useTempRoot is True.
915 """
916 rndstr = "".join(
917 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
918 )
919 return rndstr + "/"
921 def setUp(self):
922 config = Config(self.configFile)
923 uri = ButlerURI(config[".datastore.datastore.root"])
924 self.bucketName = uri.netloc
926 # set up some fake credentials if they do not exist
927 self.usingDummyCredentials = setAwsEnvCredentials()
929 if self.useTempRoot:
930 self.root = self.genRoot()
931 rooturi = f"s3://{self.bucketName}/{self.root}"
932 config.update({"datastore": {"datastore": {"root": rooturi}}})
934 # MOTO needs to know that we expect Bucket bucketname to exist
935 # (this used to be the class attribute bucketName)
936 s3 = boto3.resource("s3")
937 s3.create_bucket(Bucket=self.bucketName)
939 self.datastoreStr = f"datastore={self.root}"
940 self.datastoreName = [f"S3Datastore@{rooturi}"]
941 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
942 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
944 def tearDown(self):
945 s3 = boto3.resource("s3")
946 bucket = s3.Bucket(self.bucketName)
947 try:
948 bucket.objects.all().delete()
949 except botocore.exceptions.ClientError as e:
950 if e.response["Error"]["Code"] == "404":
951 # the key was not reachable - pass
952 pass
953 else:
954 raise
956 bucket = s3.Bucket(self.bucketName)
957 bucket.delete()
959 # unset any potentially set dummy credentials
960 if self.usingDummyCredentials:
961 unsetAwsEnvCredentials()
963 def checkFileExists(self, root, relpath):
964 """Checks if file exists at a given path (relative to root).
966 Test testPutTemplates verifies actual physical existance of the files
967 in the requested location. For S3Datastore this test is equivalent to
968 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
969 """
970 uri = ButlerURI(root)
971 client = boto3.client("s3")
972 return s3CheckFileExists(uri, client=client)[0]
974 @unittest.expectedFailure
975 def testImportExport(self):
976 super().testImportExport()
979if __name__ == "__main__": 979 ↛ 980line 979 didn't jump to line 980, because the condition on line 979 was never true
980 unittest.main()