Coverage for tests/test_butler.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import numpy as np
35try:
36 import boto3
37 import botocore
38 from moto import mock_s3
39except ImportError:
40 boto3 = None
42 def mock_s3(cls):
43 """A no-op decorator in case moto mock_s3 can not be imported.
44 """
45 return cls
47from lsst.utils import doImport
48from lsst.daf.butler.core.safeFileIo import safeMakeDir
49from lsst.daf.butler import Butler, Config, ButlerConfig
50from lsst.daf.butler import StorageClassFactory
51from lsst.daf.butler import DatasetType, DatasetRef
52from lsst.daf.butler import FileTemplateValidationError, ValidationError
53from lsst.daf.butler import FileDataset
54from lsst.daf.butler import CollectionSearch
55from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
56from lsst.daf.butler.core.location import ButlerURI
57from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
58 unsetAwsEnvCredentials)
60from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
62TESTDIR = os.path.abspath(os.path.dirname(__file__))
65def makeExampleMetrics():
66 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
67 {"a": [1, 2, 3],
68 "b": {"blue": 5, "red": "green"}},
69 [563, 234, 456.7, 752, 8, 9, 27]
70 )
73class TransactionTestError(Exception):
74 """Specific error for testing transactions, to prevent misdiagnosing
75 that might otherwise occur when a standard exception is used.
76 """
77 pass
80class ButlerConfigTests(unittest.TestCase):
81 """Simple tests for ButlerConfig that are not tested in other test cases.
82 """
84 def testSearchPath(self):
85 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
86 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
87 config1 = ButlerConfig(configFile)
88 self.assertNotIn("testConfigs", "\n".join(cm.output))
90 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
91 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
92 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
93 self.assertIn("testConfigs", "\n".join(cm.output))
95 key = ("datastore", "records", "table")
96 self.assertNotEqual(config1[key], config2[key])
97 self.assertEqual(config2[key], "override_record")
100class ButlerPutGetTests:
101 """Helper method for running a suite of put/get tests from different
102 butler configurations."""
104 root = None
106 @staticmethod
107 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
108 """Create a DatasetType and register it
109 """
110 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
111 registry.registerDatasetType(datasetType)
112 return datasetType
114 @classmethod
115 def setUpClass(cls):
116 cls.storageClassFactory = StorageClassFactory()
117 cls.storageClassFactory.addFromConfig(cls.configFile)
119 def assertGetComponents(self, butler, datasetRef, components, reference):
120 datasetTypeName = datasetRef.datasetType.name
121 dataId = datasetRef.dataId
122 for component in components:
123 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
124 result = butler.get(compTypeName, dataId)
125 self.assertEqual(result, getattr(reference, component))
127 def tearDown(self):
128 if self.root is not None and os.path.exists(self.root):
129 shutil.rmtree(self.root, ignore_errors=True)
131 def runPutGetTest(self, storageClass, datasetTypeName):
132 # New datasets will be added to run and tag, but we will only look in
133 # tag when looking up datasets.
134 run = "ingest/run"
135 tag = "ingest"
136 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
138 # There will not be a collection yet
139 collections = set(butler.registry.queryCollections())
140 self.assertEqual(collections, set([run, tag]))
142 # Create and register a DatasetType
143 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
145 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
147 # Add needed Dimensions
148 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
149 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
150 "name": "d-r",
151 "abstract_filter": "R"})
152 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
153 "id": 1,
154 "name": "default"})
155 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
156 "name": "fourtwentythree", "physical_filter": "d-r",
157 "visit_system": 1})
159 # Create and store a dataset
160 metric = makeExampleMetrics()
161 dataId = {"instrument": "DummyCamComp", "visit": 423}
163 # Create a DatasetRef for put
164 refIn = DatasetRef(datasetType, dataId, id=None)
166 # Put with a preexisting id should fail
167 with self.assertRaises(ValueError):
168 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
170 # Put and remove the dataset once as a DatasetRef, once as a dataId,
171 # and once with a DatasetType
172 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
173 with self.subTest(args=args):
174 ref = butler.put(metric, *args)
175 self.assertIsInstance(ref, DatasetRef)
177 # Test getDirect
178 metricOut = butler.getDirect(ref)
179 self.assertEqual(metric, metricOut)
180 # Test get
181 metricOut = butler.get(ref.datasetType.name, dataId)
182 self.assertEqual(metric, metricOut)
183 # Test get with a datasetRef
184 metricOut = butler.get(ref)
185 self.assertEqual(metric, metricOut)
186 # Test getDeferred with dataId
187 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
188 self.assertEqual(metric, metricOut)
189 # Test getDeferred with a datasetRef
190 metricOut = butler.getDeferred(ref).get()
191 self.assertEqual(metric, metricOut)
193 # Check we can get components
194 if storageClass.isComposite():
195 self.assertGetComponents(butler, ref,
196 ("summary", "data", "output"), metric)
198 # Remove from the tagged collection only; after that we
199 # shouldn't be able to find it unless we use the dataset_id.
200 butler.prune([ref])
201 with self.assertRaises(LookupError):
202 butler.datasetExists(*args)
203 # Registry still knows about it, if we use the dataset_id.
204 self.assertEqual(butler.registry.getDataset(ref.id), ref)
205 # If we use the output ref with the dataset_id, we should
206 # still be able to load it with getDirect().
207 self.assertEqual(metric, butler.getDirect(ref))
209 # Reinsert into collection, then delete from Datastore *and*
210 # remove from collection.
211 butler.registry.associate(tag, [ref])
212 butler.prune([ref], unstore=True)
213 # Lookup with original args should still fail.
214 with self.assertRaises(LookupError):
215 butler.datasetExists(*args)
216 # Now getDirect() should fail, too.
217 with self.assertRaises(FileNotFoundError):
218 butler.getDirect(ref)
219 # Registry still knows about it, if we use the dataset_id.
220 self.assertEqual(butler.registry.getDataset(ref.id), ref)
222 # Now remove the dataset completely.
223 butler.prune([ref], purge=True, unstore=True)
224 # Lookup with original args should still fail.
225 with self.assertRaises(LookupError):
226 butler.datasetExists(*args)
227 # getDirect() should still fail.
228 with self.assertRaises(FileNotFoundError):
229 butler.getDirect(ref)
230 # Registry shouldn't be able to find it by dataset_id anymore.
231 self.assertIsNone(butler.registry.getDataset(ref.id))
233 # Put the dataset again, since the last thing we did was remove it.
234 ref = butler.put(metric, refIn)
236 # Get with parameters
237 stop = 4
238 sliced = butler.get(ref, parameters={"slice": slice(stop)})
239 self.assertNotEqual(metric, sliced)
240 self.assertEqual(metric.summary, sliced.summary)
241 self.assertEqual(metric.output, sliced.output)
242 self.assertEqual(metric.data[:stop], sliced.data)
243 # getDeferred with parameters
244 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
245 self.assertNotEqual(metric, sliced)
246 self.assertEqual(metric.summary, sliced.summary)
247 self.assertEqual(metric.output, sliced.output)
248 self.assertEqual(metric.data[:stop], sliced.data)
249 # getDeferred with deferred parameters
250 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
251 self.assertNotEqual(metric, sliced)
252 self.assertEqual(metric.summary, sliced.summary)
253 self.assertEqual(metric.output, sliced.output)
254 self.assertEqual(metric.data[:stop], sliced.data)
256 if storageClass.isComposite():
257 # Delete one component and check that the other components
258 # can still be retrieved
259 metricOut = butler.get(ref.datasetType.name, dataId)
260 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
261 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
262 summary = butler.get(compNameS, dataId)
263 self.assertEqual(summary, metric.summary)
264 self.assertTrue(butler.datastore.exists(ref.components["summary"]))
266 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
267 butler.prune([compRef], unstore=True)
268 with self.assertRaises(LookupError):
269 butler.datasetExists(compNameS, dataId)
270 self.assertFalse(butler.datastore.exists(ref.components["summary"]))
271 self.assertTrue(butler.datastore.exists(ref.components["data"]))
272 data = butler.get(compNameD, dataId)
273 self.assertEqual(data, metric.data)
275 # Create a Dataset type that has the same name but is inconsistent.
276 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
277 self.storageClassFactory.getStorageClass("Config"))
279 # Getting with a dataset type that does not match registry fails
280 with self.assertRaises(ValueError):
281 butler.get(inconsistentDatasetType, dataId)
283 # Combining a DatasetRef with a dataId should fail
284 with self.assertRaises(ValueError):
285 butler.get(ref, dataId)
286 # Getting with an explicit ref should fail if the id doesn't match
287 with self.assertRaises(ValueError):
288 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
290 # Getting a dataset with unknown parameters should fail
291 with self.assertRaises(KeyError):
292 butler.get(ref, parameters={"unsupported": True})
294 # Check we have a collection
295 collections = set(butler.registry.queryCollections())
296 self.assertEqual(collections, {run, tag})
298 # Clean up to check that we can remove something that may have
299 # already had a component removed
300 butler.prune([ref], unstore=True, purge=True)
302 # Add a dataset back in since some downstream tests require
303 # something to be present
304 ref = butler.put(metric, refIn)
306 return butler
308 def testDeferredCollectionPassing(self):
309 # Construct a butler with no run or collection, but make it writeable.
310 butler = Butler(self.tmpConfigFile, writeable=True)
311 # Create and register a DatasetType
312 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
313 datasetType = self.addDatasetType("example", dimensions,
314 self.storageClassFactory.getStorageClass("StructuredData"),
315 butler.registry)
316 # Add needed Dimensions
317 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
318 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
319 "name": "d-r",
320 "abstract_filter": "R"})
321 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
322 "name": "fourtwentythree", "physical_filter": "d-r"})
323 dataId = {"instrument": "DummyCamComp", "visit": 423}
324 # Create dataset.
325 metric = makeExampleMetrics()
326 # Register a new run and put dataset.
327 run = "deferred"
328 butler.registry.registerRun(run)
329 ref = butler.put(metric, datasetType, dataId, run=run)
330 # Putting with no run should fail with TypeError.
331 with self.assertRaises(TypeError):
332 butler.put(metric, datasetType, dataId)
333 # Dataset should exist.
334 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
335 # We should be able to get the dataset back, but with and without
336 # a deferred dataset handle.
337 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
338 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
339 # Trying to find the dataset without any collection is a TypeError.
340 with self.assertRaises(TypeError):
341 butler.datasetExists(datasetType, dataId)
342 with self.assertRaises(TypeError):
343 butler.get(datasetType, dataId)
344 # Associate the dataset with a different collection.
345 butler.registry.registerCollection("tagged")
346 butler.registry.associate("tagged", [ref])
347 # Deleting the dataset from the new collection should make it findable
348 # in the original collection.
349 butler.prune([ref], tags=["tagged"])
350 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
353class ButlerTests(ButlerPutGetTests):
354 """Tests for Butler.
355 """
356 useTempRoot = True
358 def setUp(self):
359 """Create a new butler root for each test."""
360 if self.useTempRoot:
361 self.root = tempfile.mkdtemp(dir=TESTDIR)
362 Butler.makeRepo(self.root, config=Config(self.configFile))
363 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
364 else:
365 self.root = None
366 self.tmpConfigFile = self.configFile
368 def testConstructor(self):
369 """Independent test of constructor.
370 """
371 butler = Butler(self.tmpConfigFile, run="ingest")
372 self.assertIsInstance(butler, Butler)
374 collections = set(butler.registry.queryCollections())
375 self.assertEqual(collections, {"ingest"})
377 butler2 = Butler(butler=butler, collections=["other"])
378 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
379 self.assertIsNone(butler2.run)
380 self.assertIs(butler.registry, butler2.registry)
381 self.assertIs(butler.datastore, butler2.datastore)
383 def testBasicPutGet(self):
384 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
385 self.runPutGetTest(storageClass, "test_metric")
387 def testCompositePutGetConcrete(self):
388 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
389 self.runPutGetTest(storageClass, "test_metric")
391 def testCompositePutGetVirtual(self):
392 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
393 self.runPutGetTest(storageClass, "test_metric_comp")
395 def testIngest(self):
396 butler = Butler(self.tmpConfigFile, run="ingest")
398 # Create and register a DatasetType
399 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
401 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
402 datasetTypeName = "metric"
404 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
406 # Add needed Dimensions
407 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
408 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
409 "name": "d-r",
410 "abstract_filter": "R"})
411 for detector in (1, 2):
412 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
413 "full_name": f"detector{detector}"})
415 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
416 "name": "fourtwentythree", "physical_filter": "d-r"},
417 {"instrument": "DummyCamComp", "id": 424,
418 "name": "fourtwentyfour", "physical_filter": "d-r"})
420 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
421 dataRoot = os.path.join(TESTDIR, "data", "basic")
422 datasets = []
423 for detector in (1, 2):
424 detector_name = f"detector_{detector}"
425 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
426 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
427 # Create a DatasetRef for ingest
428 refIn = DatasetRef(datasetType, dataId, id=None)
430 datasets.append(FileDataset(path=metricFile,
431 refs=[refIn],
432 formatter=formatter))
434 butler.ingest(*datasets, transfer="copy")
436 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
437 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
439 metrics1 = butler.get(datasetTypeName, dataId1)
440 metrics2 = butler.get(datasetTypeName, dataId2)
441 self.assertNotEqual(metrics1, metrics2)
443 # Compare URIs
444 uri1 = butler.getUri(datasetTypeName, dataId1)
445 uri2 = butler.getUri(datasetTypeName, dataId2)
446 self.assertNotEqual(uri1, uri2)
448 # Now do a multi-dataset but single file ingest
449 metricFile = os.path.join(dataRoot, "detectors.yaml")
450 refs = []
451 for detector in (1, 2):
452 detector_name = f"detector_{detector}"
453 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
454 # Create a DatasetRef for ingest
455 refs.append(DatasetRef(datasetType, dataId, id=None))
457 datasets = []
458 datasets.append(FileDataset(path=metricFile,
459 refs=refs,
460 formatter=MultiDetectorFormatter))
462 butler.ingest(*datasets, transfer="copy")
464 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
465 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
467 multi1 = butler.get(datasetTypeName, dataId1)
468 multi2 = butler.get(datasetTypeName, dataId2)
470 self.assertEqual(multi1, metrics1)
471 self.assertEqual(multi2, metrics2)
473 # Compare URIs
474 uri1 = butler.getUri(datasetTypeName, dataId1)
475 uri2 = butler.getUri(datasetTypeName, dataId2)
476 self.assertEqual(uri1, uri2)
478 # Test that removing one does not break the second
479 butler.prune([datasets[0].refs[0]], unstore=True, disassociate=False)
480 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
481 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
482 multi2b = butler.get(datasetTypeName, dataId2)
483 self.assertEqual(multi2, multi2b)
485 def testPickle(self):
486 """Test pickle support.
487 """
488 butler = Butler(self.tmpConfigFile, run="ingest")
489 butlerOut = pickle.loads(pickle.dumps(butler))
490 self.assertIsInstance(butlerOut, Butler)
491 self.assertEqual(butlerOut._config, butler._config)
492 self.assertEqual(butlerOut.collections, butler.collections)
493 self.assertEqual(butlerOut.run, butler.run)
495 def testGetDatasetTypes(self):
496 butler = Butler(self.tmpConfigFile, run="ingest")
497 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
498 dimensionEntries = [
499 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
500 {"instrument": "DummyCamComp"}),
501 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
502 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
503 ]
504 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
505 # Add needed Dimensions
506 for args in dimensionEntries:
507 butler.registry.insertDimensionData(*args)
509 # When a DatasetType is added to the registry entries are created
510 # for each component. Need entries for each component in the test
511 # configuration otherwise validation won't work. The ones that
512 # are deliberately broken will be ignored later.
513 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"}
514 components = set()
515 for datasetTypeName in datasetTypeNames:
516 # Create and register a DatasetType
517 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
519 for componentName in storageClass.components:
520 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
522 fromRegistry = set(butler.registry.queryDatasetTypes())
523 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
525 # Now that we have some dataset types registered, validate them
526 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
527 "datasetType.component"])
529 # Add a new datasetType that will fail template validation
530 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
531 if self.validationCanFail:
532 with self.assertRaises(ValidationError):
533 butler.validateConfiguration()
535 # Rerun validation but with a subset of dataset type names
536 butler.validateConfiguration(datasetTypeNames=["metric4"])
538 # Rerun validation but ignore the bad datasetType
539 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
540 "datasetType.component"])
542 def testTransaction(self):
543 butler = Butler(self.tmpConfigFile, run="ingest")
544 datasetTypeName = "test_metric"
545 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
546 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
547 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
548 "abstract_filter": "R"}),
549 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
550 "physical_filter": "d-r"}))
551 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
552 metric = makeExampleMetrics()
553 dataId = {"instrument": "DummyCam", "visit": 42}
554 with self.assertRaises(TransactionTestError):
555 with butler.transaction():
556 # Create and register a DatasetType
557 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
558 # Add needed Dimensions
559 for args in dimensionEntries:
560 butler.registry.insertDimensionData(*args)
561 # Store a dataset
562 ref = butler.put(metric, datasetTypeName, dataId)
563 self.assertIsInstance(ref, DatasetRef)
564 # Test getDirect
565 metricOut = butler.getDirect(ref)
566 self.assertEqual(metric, metricOut)
567 # Test get
568 metricOut = butler.get(datasetTypeName, dataId)
569 self.assertEqual(metric, metricOut)
570 # Check we can get components
571 self.assertGetComponents(butler, ref,
572 ("summary", "data", "output"), metric)
573 raise TransactionTestError("This should roll back the entire transaction")
575 with self.assertRaises(KeyError):
576 butler.registry.getDatasetType(datasetTypeName)
577 with self.assertRaises(LookupError):
578 butler.registry.expandDataId(dataId)
579 # Should raise KeyError for missing DatasetType
580 with self.assertRaises(KeyError):
581 butler.get(datasetTypeName, dataId)
582 # Also check explicitly if Dataset entry is missing
583 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
584 # Direct retrieval should not find the file in the Datastore
585 with self.assertRaises(FileNotFoundError):
586 butler.getDirect(ref)
588 def testMakeRepo(self):
589 """Test that we can write butler configuration to a new repository via
590 the Butler.makeRepo interface and then instantiate a butler from the
591 repo root.
592 """
593 # Do not run the test if we know this datastore configuration does
594 # not support a file system root
595 if self.fullConfigKey is None:
596 return
598 # Remove the file created in setUp
599 os.unlink(self.tmpConfigFile)
601 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile))
602 limited = Config(self.configFile)
603 butler1 = Butler(butlerConfig)
604 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
605 config=Config(self.configFile), overwrite=True)
606 full = Config(self.tmpConfigFile)
607 butler2 = Butler(butlerConfig)
608 # Butlers should have the same configuration regardless of whether
609 # defaults were expanded.
610 self.assertEqual(butler1._config, butler2._config)
611 # Config files loaded directly should not be the same.
612 self.assertNotEqual(limited, full)
613 # Make sure "limited" doesn't have a few keys we know it should be
614 # inheriting from defaults.
615 self.assertIn(self.fullConfigKey, full)
616 self.assertNotIn(self.fullConfigKey, limited)
618 # Collections don't appear until something is put in them
619 collections1 = set(butler1.registry.queryCollections())
620 self.assertEqual(collections1, set())
621 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
623 # Check that a config with no associated file name will not
624 # work properly with relocatable Butler repo
625 butlerConfig.configFile = None
626 with self.assertRaises(ValueError):
627 Butler(butlerConfig)
629 with self.assertRaises(FileExistsError):
630 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
631 config=Config(self.configFile), overwrite=False)
633 def testStringification(self):
634 butler = Butler(self.tmpConfigFile, run="ingest")
635 butlerStr = str(butler)
637 if self.datastoreStr is not None:
638 for testStr in self.datastoreStr:
639 self.assertIn(testStr, butlerStr)
640 if self.registryStr is not None:
641 self.assertIn(self.registryStr, butlerStr)
643 datastoreName = butler.datastore.name
644 if self.datastoreName is not None:
645 for testStr in self.datastoreName:
646 self.assertIn(testStr, datastoreName)
649class FileLikeDatastoreButlerTests(ButlerTests):
650 """Common tests and specialization of ButlerTests for butlers backed
651 by datastores that inherit from FileLikeDatastore.
652 """
654 def checkFileExists(self, root, path):
655 """Checks if file exists at a given path (relative to root).
657 Test testPutTemplates verifies actual physical existance of the files
658 in the requested location. For POSIXDatastore this test is equivalent
659 to `os.path.exists` call.
660 """
661 return os.path.exists(os.path.join(root, path))
663 def testPutTemplates(self):
664 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
665 butler = Butler(self.tmpConfigFile, run="ingest")
667 # Add needed Dimensions
668 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
669 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
670 "name": "d-r",
671 "abstract_filter": "R"})
672 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
673 "physical_filter": "d-r"})
674 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
675 "physical_filter": "d-r"})
677 # Create and store a dataset
678 metric = makeExampleMetrics()
680 # Create two almost-identical DatasetTypes (both will use default
681 # template)
682 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
683 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
684 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
685 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
687 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)}
688 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
689 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
691 # Put with exactly the data ID keys needed
692 ref = butler.put(metric, "metric1", dataId1)
693 self.assertTrue(self.checkFileExists(butler.datastore.root,
694 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
696 # Check the template based on dimensions
697 butler.datastore.templates.validateTemplates([ref])
699 # Put with extra data ID keys (physical_filter is an optional
700 # dependency); should not change template (at least the way we're
701 # defining them to behave now; the important thing is that they
702 # must be consistent).
703 ref = butler.put(metric, "metric2", dataId2)
704 self.assertTrue(self.checkFileExists(butler.datastore.root,
705 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
707 # Check the template based on dimensions
708 butler.datastore.templates.validateTemplates([ref])
710 # Now use a file template that will not result in unique filenames
711 ref = butler.put(metric, "metric3", dataId1)
713 # Check the template based on dimensions. This one is a bad template
714 with self.assertRaises(FileTemplateValidationError):
715 butler.datastore.templates.validateTemplates([ref])
717 with self.assertRaises(FileExistsError):
718 butler.put(metric, "metric3", dataId3)
720 def testImportExport(self):
721 # Run put/get tests just to create and populate a repo.
722 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
723 exportButler = self.runPutGetTest(storageClass, "test_metric")
724 # Test that the repo actually has at least one dataset.
725 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
726 self.assertGreater(len(datasets), 0)
727 # Export those datasets. We used TemporaryDirectory because there
728 # doesn't seem to be a way to get the filename (as opposed to the file
729 # object) from any of tempfile's temporary-file context managers.
730 with tempfile.TemporaryDirectory() as exportDir:
731 # TODO: When PosixDatastore supports transfer-on-exist, add tests
732 # for that.
733 exportFile = os.path.join(exportDir, "exports.yaml")
734 with exportButler.export(filename=exportFile) as export:
735 export.saveDatasets(datasets)
736 self.assertTrue(os.path.exists(exportFile))
737 with tempfile.TemporaryDirectory() as importDir:
738 Butler.makeRepo(importDir, config=Config(self.configFile))
739 importButler = Butler(importDir, run="ingest/run")
740 importButler.import_(filename=exportFile, directory=exportButler.datastore.root,
741 transfer="symlink")
742 for ref in datasets:
743 with self.subTest(ref=ref):
744 # Test for existence by passing in the DatasetType and
745 # data ID separately, to avoid lookup by dataset_id.
746 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
749class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
750 """PosixDatastore specialization of a butler"""
751 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
752 fullConfigKey = ".datastore.formatters"
753 validationCanFail = True
754 datastoreStr = ["/tmp"]
755 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
756 registryStr = "/gen3.sqlite3"
759class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
760 """InMemoryDatastore specialization of a butler"""
761 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
762 fullConfigKey = None
763 useTempRoot = False
764 validationCanFail = False
765 datastoreStr = ["datastore='InMemory"]
766 datastoreName = ["InMemoryDatastore@"]
767 registryStr = ":memory:"
769 def testIngest(self):
770 pass
773class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
774 """PosixDatastore specialization"""
775 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
776 fullConfigKey = ".datastore.datastores.1.formatters"
777 validationCanFail = True
778 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
779 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
780 "SecondDatastore"]
781 registryStr = "/gen3.sqlite3"
784class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
785 """Test that a yaml file in one location can refer to a root in another."""
787 datastoreStr = ["dir1"]
788 # Disable the makeRepo test since we are deliberately not using
789 # butler.yaml as the config name.
790 fullConfigKey = None
792 def setUp(self):
793 self.root = tempfile.mkdtemp(dir=TESTDIR)
795 # Make a new repository in one place
796 self.dir1 = os.path.join(self.root, "dir1")
797 Butler.makeRepo(self.dir1, config=Config(self.configFile))
799 # Move the yaml file to a different place and add a "root"
800 self.dir2 = os.path.join(self.root, "dir2")
801 safeMakeDir(self.dir2)
802 configFile1 = os.path.join(self.dir1, "butler.yaml")
803 config = Config(configFile1)
804 config["root"] = self.dir1
805 configFile2 = os.path.join(self.dir2, "butler2.yaml")
806 config.dumpToFile(configFile2)
807 os.remove(configFile1)
808 self.tmpConfigFile = configFile2
810 def testFileLocations(self):
811 self.assertNotEqual(self.dir1, self.dir2)
812 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
813 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
814 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
817class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
818 """Test that a config file created by makeRepo outside of repo works."""
820 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
822 def setUp(self):
823 self.root = tempfile.mkdtemp(dir=TESTDIR)
824 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
826 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
827 Butler.makeRepo(self.root, config=Config(self.configFile),
828 outfile=self.tmpConfigFile)
830 def tearDown(self):
831 if os.path.exists(self.root2):
832 shutil.rmtree(self.root2, ignore_errors=True)
833 super().tearDown()
835 def testConfigExistence(self):
836 c = Config(self.tmpConfigFile)
837 uri_config = ButlerURI(c["root"])
838 uri_expected = ButlerURI(self.root)
839 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
840 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
842 def testPutGet(self):
843 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
844 self.runPutGetTest(storageClass, "test_metric")
847class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
848 """Test that a config file created by makeRepo outside of repo works."""
850 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
852 def setUp(self):
853 self.root = tempfile.mkdtemp(dir=TESTDIR)
854 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
856 self.tmpConfigFile = self.root2
857 Butler.makeRepo(self.root, config=Config(self.configFile),
858 outfile=self.tmpConfigFile)
860 def testConfigExistence(self):
861 # Append the yaml file else Config constructor does not know the file
862 # type.
863 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
864 super().testConfigExistence()
867class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
868 """Test that a config file created by makeRepo outside of repo works."""
870 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
872 def setUp(self):
873 self.root = tempfile.mkdtemp(dir=TESTDIR)
874 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
876 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
877 Butler.makeRepo(self.root, config=Config(self.configFile),
878 outfile=self.tmpConfigFile)
881@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
882@mock_s3
883class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
884 """S3Datastore specialization of a butler; an S3 storage Datastore +
885 a local in-memory SqlRegistry.
886 """
887 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
888 fullConfigKey = None
889 validationCanFail = True
891 bucketName = "anybucketname"
892 """Name of the Bucket that will be used in the tests. The name is read from
893 the config file used with the tests during set-up.
894 """
896 root = "butlerRoot/"
897 """Root repository directory expected to be used in case useTempRoot=False.
898 Otherwise the root is set to a 20 characters long randomly generated string
899 during set-up.
900 """
902 datastoreStr = [f"datastore={root}"]
903 """Contains all expected root locations in a format expected to be
904 returned by Butler stringification.
905 """
907 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
908 """The expected format of the S3Datastore string."""
910 registryStr = f":memory:"
911 """Expected format of the Registry string."""
913 def genRoot(self):
914 """Returns a random string of len 20 to serve as a root
915 name for the temporary bucket repo.
917 This is equivalent to tempfile.mkdtemp as this is what self.root
918 becomes when useTempRoot is True.
919 """
920 rndstr = "".join(
921 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
922 )
923 return rndstr + "/"
925 def setUp(self):
926 config = Config(self.configFile)
927 uri = ButlerURI(config[".datastore.datastore.root"])
928 self.bucketName = uri.netloc
930 # set up some fake credentials if they do not exist
931 self.usingDummyCredentials = setAwsEnvCredentials()
933 if self.useTempRoot:
934 self.root = self.genRoot()
935 rooturi = f"s3://{self.bucketName}/{self.root}"
936 config.update({"datastore": {"datastore": {"root": rooturi}}})
938 # MOTO needs to know that we expect Bucket bucketname to exist
939 # (this used to be the class attribute bucketName)
940 s3 = boto3.resource("s3")
941 s3.create_bucket(Bucket=self.bucketName)
943 self.datastoreStr = f"datastore={self.root}"
944 self.datastoreName = [f"S3Datastore@{rooturi}"]
945 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
946 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
948 def tearDown(self):
949 s3 = boto3.resource("s3")
950 bucket = s3.Bucket(self.bucketName)
951 try:
952 bucket.objects.all().delete()
953 except botocore.exceptions.ClientError as e:
954 if e.response["Error"]["Code"] == "404":
955 # the key was not reachable - pass
956 pass
957 else:
958 raise
960 bucket = s3.Bucket(self.bucketName)
961 bucket.delete()
963 # unset any potentially set dummy credentials
964 if self.usingDummyCredentials:
965 unsetAwsEnvCredentials()
967 def checkFileExists(self, root, relpath):
968 """Checks if file exists at a given path (relative to root).
970 Test testPutTemplates verifies actual physical existance of the files
971 in the requested location. For S3Datastore this test is equivalent to
972 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
973 """
974 uri = ButlerURI(root)
975 client = boto3.client("s3")
976 return s3CheckFileExists(uri, client=client)[0]
978 @unittest.expectedFailure
979 def testImportExport(self):
980 super().testImportExport()
983if __name__ == "__main__": 983 ↛ 984line 983 didn't jump to line 984, because the condition on line 983 was never true
984 unittest.main()