Coverage for tests/test_butler.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import time
34import socket
36try:
37 import boto3
38 import botocore
39 from moto import mock_s3
40except ImportError:
41 boto3 = None
43 def mock_s3(cls):
44 """A no-op decorator in case moto mock_s3 can not be imported.
45 """
46 return cls
48try:
49 from cheroot import wsgi
50 from wsgidav.wsgidav_app import WsgiDAVApp
51except ImportError:
52 WsgiDAVApp = None
54import astropy.time
55from threading import Thread
56from tempfile import gettempdir
57from lsst.utils import doImport
58from lsst.daf.butler.core.utils import safeMakeDir
59from lsst.daf.butler import Butler, Config, ButlerConfig
60from lsst.daf.butler import StorageClassFactory
61from lsst.daf.butler import DatasetType, DatasetRef
62from lsst.daf.butler import FileTemplateValidationError, ValidationError
63from lsst.daf.butler import FileDataset
64from lsst.daf.butler import CollectionSearch, CollectionType
65from lsst.daf.butler import ButlerURI
66from lsst.daf.butler import script
67from lsst.daf.butler.registry import MissingCollectionError, OrphanedRecordError
68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
69from lsst.daf.butler.core.s3utils import (setAwsEnvCredentials,
70 unsetAwsEnvCredentials)
71from lsst.daf.butler.core.webdavutils import isWebdavEndpoint
73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
75TESTDIR = os.path.abspath(os.path.dirname(__file__))
78def makeExampleMetrics():
79 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
80 {"a": [1, 2, 3],
81 "b": {"blue": 5, "red": "green"}},
82 [563, 234, 456.7, 752, 8, 9, 27]
83 )
86class TransactionTestError(Exception):
87 """Specific error for testing transactions, to prevent misdiagnosing
88 that might otherwise occur when a standard exception is used.
89 """
90 pass
93class ButlerConfigTests(unittest.TestCase):
94 """Simple tests for ButlerConfig that are not tested in other test cases.
95 """
97 def testSearchPath(self):
98 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
99 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
100 config1 = ButlerConfig(configFile)
101 self.assertNotIn("testConfigs", "\n".join(cm.output))
103 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
104 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
105 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
106 self.assertIn("testConfigs", "\n".join(cm.output))
108 key = ("datastore", "records", "table")
109 self.assertNotEqual(config1[key], config2[key])
110 self.assertEqual(config2[key], "override_record")
113class ButlerPutGetTests:
114 """Helper method for running a suite of put/get tests from different
115 butler configurations."""
117 root = None
119 @staticmethod
120 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
121 """Create a DatasetType and register it
122 """
123 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
124 registry.registerDatasetType(datasetType)
125 return datasetType
127 @classmethod
128 def setUpClass(cls):
129 cls.storageClassFactory = StorageClassFactory()
130 cls.storageClassFactory.addFromConfig(cls.configFile)
132 def assertGetComponents(self, butler, datasetRef, components, reference):
133 datasetType = datasetRef.datasetType
134 dataId = datasetRef.dataId
135 for component in components:
136 compTypeName = datasetType.componentTypeName(component)
137 result = butler.get(compTypeName, dataId)
138 self.assertEqual(result, getattr(reference, component))
140 def tearDown(self):
141 if self.root is not None and os.path.exists(self.root):
142 shutil.rmtree(self.root, ignore_errors=True)
144 def runPutGetTest(self, storageClass, datasetTypeName):
145 # New datasets will be added to run and tag, but we will only look in
146 # tag when looking up datasets.
147 run = "ingest/run"
148 tag = "ingest"
149 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
151 # There will not be a collection yet
152 collections = set(butler.registry.queryCollections())
153 self.assertEqual(collections, set([run, tag]))
155 # Create and register a DatasetType
156 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
158 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
160 # Add needed Dimensions
161 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
162 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
163 "name": "d-r",
164 "band": "R"})
165 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
166 "id": 1,
167 "name": "default"})
168 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
169 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
170 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
171 "name": "fourtwentythree", "physical_filter": "d-r",
172 "visit_system": 1, "datetime_begin": visit_start,
173 "datetime_end": visit_end})
175 # Add a second visit for some later tests
176 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
177 "name": "fourtwentyfour", "physical_filter": "d-r",
178 "visit_system": 1})
180 # Create and store a dataset
181 metric = makeExampleMetrics()
182 dataId = {"instrument": "DummyCamComp", "visit": 423}
184 # Create a DatasetRef for put
185 refIn = DatasetRef(datasetType, dataId, id=None)
187 # Put with a preexisting id should fail
188 with self.assertRaises(ValueError):
189 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
191 # Put and remove the dataset once as a DatasetRef, once as a dataId,
192 # and once with a DatasetType
193 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
194 with self.subTest(args=args):
195 ref = butler.put(metric, *args)
196 self.assertIsInstance(ref, DatasetRef)
198 # Test getDirect
199 metricOut = butler.getDirect(ref)
200 self.assertEqual(metric, metricOut)
201 # Test get
202 metricOut = butler.get(ref.datasetType.name, dataId)
203 self.assertEqual(metric, metricOut)
204 # Test get with a datasetRef
205 metricOut = butler.get(ref)
206 self.assertEqual(metric, metricOut)
207 # Test getDeferred with dataId
208 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
209 self.assertEqual(metric, metricOut)
210 # Test getDeferred with a datasetRef
211 metricOut = butler.getDeferred(ref).get()
212 self.assertEqual(metric, metricOut)
214 # Check we can get components
215 if storageClass.isComposite():
216 self.assertGetComponents(butler, ref,
217 ("summary", "data", "output"), metric)
219 # Remove from the tagged collection only; after that we
220 # shouldn't be able to find it unless we use the dataset_id.
221 butler.pruneDatasets([ref])
222 with self.assertRaises(LookupError):
223 butler.datasetExists(*args)
224 # Registry still knows about it, if we use the dataset_id.
225 self.assertEqual(butler.registry.getDataset(ref.id), ref)
226 # If we use the output ref with the dataset_id, we should
227 # still be able to load it with getDirect().
228 self.assertEqual(metric, butler.getDirect(ref))
230 # Reinsert into collection, then delete from Datastore *and*
231 # remove from collection.
232 butler.registry.associate(tag, [ref])
233 butler.pruneDatasets([ref], unstore=True)
234 # Lookup with original args should still fail.
235 with self.assertRaises(LookupError):
236 butler.datasetExists(*args)
237 # Now getDirect() should fail, too.
238 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
239 butler.getDirect(ref)
240 # Registry still knows about it, if we use the dataset_id.
241 self.assertEqual(butler.registry.getDataset(ref.id), ref)
243 # Now remove the dataset completely.
244 butler.pruneDatasets([ref], purge=True, unstore=True)
245 # Lookup with original args should still fail.
246 with self.assertRaises(LookupError):
247 butler.datasetExists(*args)
248 # getDirect() should still fail.
249 with self.assertRaises(FileNotFoundError):
250 butler.getDirect(ref)
251 # Registry shouldn't be able to find it by dataset_id anymore.
252 self.assertIsNone(butler.registry.getDataset(ref.id))
254 # Put the dataset again, since the last thing we did was remove it.
255 ref = butler.put(metric, refIn)
257 # Get with parameters
258 stop = 4
259 sliced = butler.get(ref, parameters={"slice": slice(stop)})
260 self.assertNotEqual(metric, sliced)
261 self.assertEqual(metric.summary, sliced.summary)
262 self.assertEqual(metric.output, sliced.output)
263 self.assertEqual(metric.data[:stop], sliced.data)
264 # getDeferred with parameters
265 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
266 self.assertNotEqual(metric, sliced)
267 self.assertEqual(metric.summary, sliced.summary)
268 self.assertEqual(metric.output, sliced.output)
269 self.assertEqual(metric.data[:stop], sliced.data)
270 # getDeferred with deferred parameters
271 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
272 self.assertNotEqual(metric, sliced)
273 self.assertEqual(metric.summary, sliced.summary)
274 self.assertEqual(metric.output, sliced.output)
275 self.assertEqual(metric.data[:stop], sliced.data)
277 if storageClass.isComposite():
278 # Check that components can be retrieved
279 metricOut = butler.get(ref.datasetType.name, dataId)
280 compNameS = ref.datasetType.componentTypeName("summary")
281 compNameD = ref.datasetType.componentTypeName("data")
282 summary = butler.get(compNameS, dataId)
283 self.assertEqual(summary, metric.summary)
284 data = butler.get(compNameD, dataId)
285 self.assertEqual(data, metric.data)
287 if "counter" in storageClass.derivedComponents:
288 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
289 self.assertEqual(count, len(data))
291 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
292 parameters={"slice": slice(stop)})
293 self.assertEqual(count, stop)
295 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
296 summary = butler.getDirect(compRef)
297 self.assertEqual(summary, metric.summary)
299 # Create a Dataset type that has the same name but is inconsistent.
300 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
301 self.storageClassFactory.getStorageClass("Config"))
303 # Getting with a dataset type that does not match registry fails
304 with self.assertRaises(ValueError):
305 butler.get(inconsistentDatasetType, dataId)
307 # Combining a DatasetRef with a dataId should fail
308 with self.assertRaises(ValueError):
309 butler.get(ref, dataId)
310 # Getting with an explicit ref should fail if the id doesn't match
311 with self.assertRaises(ValueError):
312 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
314 # Getting a dataset with unknown parameters should fail
315 with self.assertRaises(KeyError):
316 butler.get(ref, parameters={"unsupported": True})
318 # Check we have a collection
319 collections = set(butler.registry.queryCollections())
320 self.assertEqual(collections, {run, tag})
322 # Clean up to check that we can remove something that may have
323 # already had a component removed
324 butler.pruneDatasets([ref], unstore=True, purge=True)
326 # Add a dataset back in since some downstream tests require
327 # something to be present
328 ref = butler.put(metric, refIn)
330 return butler
332 def testDeferredCollectionPassing(self):
333 # Construct a butler with no run or collection, but make it writeable.
334 butler = Butler(self.tmpConfigFile, writeable=True)
335 # Create and register a DatasetType
336 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
337 datasetType = self.addDatasetType("example", dimensions,
338 self.storageClassFactory.getStorageClass("StructuredData"),
339 butler.registry)
340 # Add needed Dimensions
341 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
342 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
343 "name": "d-r",
344 "band": "R"})
345 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
346 "name": "fourtwentythree", "physical_filter": "d-r"})
347 dataId = {"instrument": "DummyCamComp", "visit": 423}
348 # Create dataset.
349 metric = makeExampleMetrics()
350 # Register a new run and put dataset.
351 run = "deferred"
352 butler.registry.registerRun(run)
353 ref = butler.put(metric, datasetType, dataId, run=run)
354 # Putting with no run should fail with TypeError.
355 with self.assertRaises(TypeError):
356 butler.put(metric, datasetType, dataId)
357 # Dataset should exist.
358 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
359 # We should be able to get the dataset back, but with and without
360 # a deferred dataset handle.
361 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
362 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
363 # Trying to find the dataset without any collection is a TypeError.
364 with self.assertRaises(TypeError):
365 butler.datasetExists(datasetType, dataId)
366 with self.assertRaises(TypeError):
367 butler.get(datasetType, dataId)
368 # Associate the dataset with a different collection.
369 butler.registry.registerCollection("tagged")
370 butler.registry.associate("tagged", [ref])
371 # Deleting the dataset from the new collection should make it findable
372 # in the original collection.
373 butler.pruneDatasets([ref], tags=["tagged"])
374 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
377class ButlerTests(ButlerPutGetTests):
378 """Tests for Butler.
379 """
380 useTempRoot = True
382 def setUp(self):
383 """Create a new butler root for each test."""
384 if self.useTempRoot:
385 self.root = tempfile.mkdtemp(dir=TESTDIR)
386 Butler.makeRepo(self.root, config=Config(self.configFile))
387 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
388 else:
389 self.root = None
390 self.tmpConfigFile = self.configFile
392 def testConstructor(self):
393 """Independent test of constructor.
394 """
395 butler = Butler(self.tmpConfigFile, run="ingest")
396 self.assertIsInstance(butler, Butler)
398 collections = set(butler.registry.queryCollections())
399 self.assertEqual(collections, {"ingest"})
401 butler2 = Butler(butler=butler, collections=["other"])
402 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
403 self.assertIsNone(butler2.run)
404 self.assertIs(butler.registry, butler2.registry)
405 self.assertIs(butler.datastore, butler2.datastore)
407 def testBasicPutGet(self):
408 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
409 self.runPutGetTest(storageClass, "test_metric")
411 def testCompositePutGetConcrete(self):
413 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
414 butler = self.runPutGetTest(storageClass, "test_metric")
416 # Should *not* be disassembled
417 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
418 self.assertEqual(len(datasets), 1)
419 uri, components = butler.getURIs(datasets[0])
420 self.assertIsInstance(uri, ButlerURI)
421 self.assertFalse(components)
422 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
423 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
425 # Predicted dataset
426 dataId = {"instrument": "DummyCamComp", "visit": 424}
427 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
428 self.assertFalse(components)
429 self.assertIsInstance(uri, ButlerURI)
430 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
431 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
433 def testCompositePutGetVirtual(self):
434 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
435 butler = self.runPutGetTest(storageClass, "test_metric_comp")
437 # Should be disassembled
438 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
439 self.assertEqual(len(datasets), 1)
440 uri, components = butler.getURIs(datasets[0])
442 if butler.datastore.isEphemeral:
443 # Never disassemble in-memory datastore
444 self.assertIsInstance(uri, ButlerURI)
445 self.assertFalse(components)
446 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
447 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
448 else:
449 self.assertIsNone(uri)
450 self.assertEqual(set(components), set(storageClass.components))
451 for compuri in components.values():
452 self.assertIsInstance(compuri, ButlerURI)
453 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
454 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
456 # Predicted dataset
457 dataId = {"instrument": "DummyCamComp", "visit": 424}
458 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
460 if butler.datastore.isEphemeral:
461 # Never disassembled
462 self.assertIsInstance(uri, ButlerURI)
463 self.assertFalse(components)
464 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
465 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
466 else:
467 self.assertIsNone(uri)
468 self.assertEqual(set(components), set(storageClass.components))
469 for compuri in components.values():
470 self.assertIsInstance(compuri, ButlerURI)
471 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
472 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
474 def testIngest(self):
475 butler = Butler(self.tmpConfigFile, run="ingest")
477 # Create and register a DatasetType
478 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
480 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
481 datasetTypeName = "metric"
483 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
485 # Add needed Dimensions
486 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
487 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
488 "name": "d-r",
489 "band": "R"})
490 for detector in (1, 2):
491 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
492 "full_name": f"detector{detector}"})
494 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
495 "name": "fourtwentythree", "physical_filter": "d-r"},
496 {"instrument": "DummyCamComp", "id": 424,
497 "name": "fourtwentyfour", "physical_filter": "d-r"})
499 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
500 dataRoot = os.path.join(TESTDIR, "data", "basic")
501 datasets = []
502 for detector in (1, 2):
503 detector_name = f"detector_{detector}"
504 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
505 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
506 # Create a DatasetRef for ingest
507 refIn = DatasetRef(datasetType, dataId, id=None)
509 datasets.append(FileDataset(path=metricFile,
510 refs=[refIn],
511 formatter=formatter))
513 butler.ingest(*datasets, transfer="copy")
515 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
516 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
518 metrics1 = butler.get(datasetTypeName, dataId1)
519 metrics2 = butler.get(datasetTypeName, dataId2)
520 self.assertNotEqual(metrics1, metrics2)
522 # Compare URIs
523 uri1 = butler.getURI(datasetTypeName, dataId1)
524 uri2 = butler.getURI(datasetTypeName, dataId2)
525 self.assertNotEqual(uri1, uri2)
527 # Now do a multi-dataset but single file ingest
528 metricFile = os.path.join(dataRoot, "detectors.yaml")
529 refs = []
530 for detector in (1, 2):
531 detector_name = f"detector_{detector}"
532 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
533 # Create a DatasetRef for ingest
534 refs.append(DatasetRef(datasetType, dataId, id=None))
536 datasets = []
537 datasets.append(FileDataset(path=metricFile,
538 refs=refs,
539 formatter=MultiDetectorFormatter))
541 butler.ingest(*datasets, transfer="copy")
543 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
544 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
546 multi1 = butler.get(datasetTypeName, dataId1)
547 multi2 = butler.get(datasetTypeName, dataId2)
549 self.assertEqual(multi1, metrics1)
550 self.assertEqual(multi2, metrics2)
552 # Compare URIs
553 uri1 = butler.getURI(datasetTypeName, dataId1)
554 uri2 = butler.getURI(datasetTypeName, dataId2)
555 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
557 # Test that removing one does not break the second
558 # This line will issue a warning log message for a ChainedDatastore
559 # that uses an InMemoryDatastore since in-memory can not ingest
560 # files.
561 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
562 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
563 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
564 multi2b = butler.get(datasetTypeName, dataId2)
565 self.assertEqual(multi2, multi2b)
567 def testPruneCollections(self):
568 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
569 butler = Butler(self.tmpConfigFile, writeable=True)
570 # Load registry data with dimensions to hang datasets off of.
571 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
572 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
573 # Add some RUN-type collections.
574 run1 = "run1"
575 butler.registry.registerRun(run1)
576 run2 = "run2"
577 butler.registry.registerRun(run2)
578 # put some datasets. ref1 and ref2 have the same data ID, and are in
579 # different runs. ref3 has a different data ID.
580 metric = makeExampleMetrics()
581 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
582 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
583 butler.registry)
584 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
585 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
586 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
588 # Add a new dataset type and delete it
589 tmpName = "prune_collections_disposable"
590 tmpDatasetType = self.addDatasetType(tmpName, dimensions, storageClass,
591 butler.registry)
592 tmpFromRegistry = butler.registry.getDatasetType(tmpName)
593 self.assertEqual(tmpDatasetType, tmpFromRegistry)
594 butler.registry.removeDatasetType(tmpName)
595 with self.assertRaises(KeyError):
596 butler.registry.getDatasetType(tmpName)
597 # Removing a second time is fine
598 butler.registry.removeDatasetType(tmpName)
600 # Component removal is not allowed
601 with self.assertRaises(ValueError):
602 butler.registry.removeDatasetType(DatasetType.nameWithComponent(tmpName, "component"))
604 # Try and fail to delete a datasetType that is associated with data
605 with self.assertRaises(OrphanedRecordError):
606 butler.registry.removeDatasetType(datasetType.name)
608 # Try to delete a RUN collection without purge, or with purge and not
609 # unstore.
610 with self.assertRaises(TypeError):
611 butler.pruneCollection(run1)
612 with self.assertRaises(TypeError):
613 butler.pruneCollection(run2, purge=True)
614 # Add a TAGGED collection and associate ref3 only into it.
615 tag1 = "tag1"
616 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
617 butler.registry.associate(tag1, [ref3])
618 # Add a CHAINED collection that searches run1 and then run2. It
619 # logically contains only ref1, because ref2 is shadowed due to them
620 # having the same data ID and dataset type.
621 chain1 = "chain1"
622 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
623 butler.registry.setCollectionChain(chain1, [run1, run2])
624 # Try to delete RUN collections, which should fail with complete
625 # rollback because they're still referenced by the CHAINED
626 # collection.
627 with self.assertRaises(Exception):
628 butler.pruneCollection(run1, pruge=True, unstore=True)
629 with self.assertRaises(Exception):
630 butler.pruneCollection(run2, pruge=True, unstore=True)
631 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
632 [ref1, ref2, ref3])
633 self.assertTrue(butler.datastore.exists(ref1))
634 self.assertTrue(butler.datastore.exists(ref2))
635 self.assertTrue(butler.datastore.exists(ref3))
636 # Try to delete CHAINED and TAGGED collections with purge; should not
637 # work.
638 with self.assertRaises(TypeError):
639 butler.pruneCollection(tag1, purge=True, unstore=True)
640 with self.assertRaises(TypeError):
641 butler.pruneCollection(chain1, purge=True, unstore=True)
642 # Remove the tagged collection with unstore=False. This should not
643 # affect the datasets.
644 butler.pruneCollection(tag1)
645 with self.assertRaises(MissingCollectionError):
646 butler.registry.getCollectionType(tag1)
647 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
648 [ref1, ref2, ref3])
649 self.assertTrue(butler.datastore.exists(ref1))
650 self.assertTrue(butler.datastore.exists(ref2))
651 self.assertTrue(butler.datastore.exists(ref3))
652 # Add the tagged collection back in, and remove it with unstore=True.
653 # This should remove ref3 only from the datastore.
654 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
655 butler.registry.associate(tag1, [ref3])
656 butler.pruneCollection(tag1, unstore=True)
657 with self.assertRaises(MissingCollectionError):
658 butler.registry.getCollectionType(tag1)
659 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
660 [ref1, ref2, ref3])
661 self.assertTrue(butler.datastore.exists(ref1))
662 self.assertTrue(butler.datastore.exists(ref2))
663 self.assertFalse(butler.datastore.exists(ref3))
664 # Delete the chain with unstore=False. The datasets should not be
665 # affected at all.
666 butler.pruneCollection(chain1)
667 with self.assertRaises(MissingCollectionError):
668 butler.registry.getCollectionType(chain1)
669 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
670 [ref1, ref2, ref3])
671 self.assertTrue(butler.datastore.exists(ref1))
672 self.assertTrue(butler.datastore.exists(ref2))
673 self.assertFalse(butler.datastore.exists(ref3))
674 # Redefine and then delete the chain with unstore=True. Only ref1
675 # should be unstored (ref3 has already been unstored, but otherwise
676 # would be now).
677 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
678 butler.registry.setCollectionChain(chain1, [run1, run2])
679 butler.pruneCollection(chain1, unstore=True)
680 with self.assertRaises(MissingCollectionError):
681 butler.registry.getCollectionType(chain1)
682 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
683 [ref1, ref2, ref3])
684 self.assertFalse(butler.datastore.exists(ref1))
685 self.assertTrue(butler.datastore.exists(ref2))
686 self.assertFalse(butler.datastore.exists(ref3))
687 # Remove run1. This removes ref1 and ref3 from the registry (they're
688 # already gone from the datastore, which is fine).
689 butler.pruneCollection(run1, purge=True, unstore=True)
690 with self.assertRaises(MissingCollectionError):
691 butler.registry.getCollectionType(run1)
692 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
693 [ref2])
694 self.assertTrue(butler.datastore.exists(ref2))
695 # Remove run2. This removes ref2 from the registry and the datastore.
696 butler.pruneCollection(run2, purge=True, unstore=True)
697 with self.assertRaises(MissingCollectionError):
698 butler.registry.getCollectionType(run2)
699 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
700 [])
702 # Now that the collections have been pruned we can remove the
703 # dataset type
704 butler.registry.removeDatasetType(datasetType.name)
706 def testPickle(self):
707 """Test pickle support.
708 """
709 butler = Butler(self.tmpConfigFile, run="ingest")
710 butlerOut = pickle.loads(pickle.dumps(butler))
711 self.assertIsInstance(butlerOut, Butler)
712 self.assertEqual(butlerOut._config, butler._config)
713 self.assertEqual(butlerOut.collections, butler.collections)
714 self.assertEqual(butlerOut.run, butler.run)
716 def testGetDatasetTypes(self):
717 butler = Butler(self.tmpConfigFile, run="ingest")
718 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
719 dimensionEntries = [
720 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
721 {"instrument": "DummyCamComp"}),
722 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
723 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
724 ]
725 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
726 # Add needed Dimensions
727 for args in dimensionEntries:
728 butler.registry.insertDimensionData(*args)
730 # When a DatasetType is added to the registry entries are not created
731 # for components but querying them can return the components.
732 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
733 components = set()
734 for datasetTypeName in datasetTypeNames:
735 # Create and register a DatasetType
736 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
738 for componentName in storageClass.components:
739 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
741 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
742 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
744 # Now that we have some dataset types registered, validate them
745 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
746 "datasetType.component"])
748 # Add a new datasetType that will fail template validation
749 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
750 if self.validationCanFail:
751 with self.assertRaises(ValidationError):
752 butler.validateConfiguration()
754 # Rerun validation but with a subset of dataset type names
755 butler.validateConfiguration(datasetTypeNames=["metric4"])
757 # Rerun validation but ignore the bad datasetType
758 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
759 "datasetType.component"])
761 def testTransaction(self):
762 butler = Butler(self.tmpConfigFile, run="ingest")
763 datasetTypeName = "test_metric"
764 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
765 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
766 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
767 "band": "R"}),
768 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
769 "physical_filter": "d-r"}))
770 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
771 metric = makeExampleMetrics()
772 dataId = {"instrument": "DummyCam", "visit": 42}
773 # Create and register a DatasetType
774 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
775 with self.assertRaises(TransactionTestError):
776 with butler.transaction():
777 # Add needed Dimensions
778 for args in dimensionEntries:
779 butler.registry.insertDimensionData(*args)
780 # Store a dataset
781 ref = butler.put(metric, datasetTypeName, dataId)
782 self.assertIsInstance(ref, DatasetRef)
783 # Test getDirect
784 metricOut = butler.getDirect(ref)
785 self.assertEqual(metric, metricOut)
786 # Test get
787 metricOut = butler.get(datasetTypeName, dataId)
788 self.assertEqual(metric, metricOut)
789 # Check we can get components
790 self.assertGetComponents(butler, ref,
791 ("summary", "data", "output"), metric)
792 raise TransactionTestError("This should roll back the entire transaction")
793 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
794 butler.registry.expandDataId(dataId)
795 # Should raise LookupError for missing data ID value
796 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
797 butler.get(datasetTypeName, dataId)
798 # Also check explicitly if Dataset entry is missing
799 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
800 # Direct retrieval should not find the file in the Datastore
801 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
802 butler.getDirect(ref)
804 def testMakeRepo(self):
805 """Test that we can write butler configuration to a new repository via
806 the Butler.makeRepo interface and then instantiate a butler from the
807 repo root.
808 """
809 # Do not run the test if we know this datastore configuration does
810 # not support a file system root
811 if self.fullConfigKey is None:
812 return
814 # Remove the file created in setUp
815 os.unlink(self.tmpConfigFile)
817 createRegistry = not self.useTempRoot
818 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile),
819 createRegistry=createRegistry)
820 limited = Config(self.configFile)
821 butler1 = Butler(butlerConfig)
822 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
823 config=Config(self.configFile), overwrite=True)
824 full = Config(self.tmpConfigFile)
825 butler2 = Butler(butlerConfig)
826 # Butlers should have the same configuration regardless of whether
827 # defaults were expanded.
828 self.assertEqual(butler1._config, butler2._config)
829 # Config files loaded directly should not be the same.
830 self.assertNotEqual(limited, full)
831 # Make sure "limited" doesn't have a few keys we know it should be
832 # inheriting from defaults.
833 self.assertIn(self.fullConfigKey, full)
834 self.assertNotIn(self.fullConfigKey, limited)
836 # Collections don't appear until something is put in them
837 collections1 = set(butler1.registry.queryCollections())
838 self.assertEqual(collections1, set())
839 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
841 # Check that a config with no associated file name will not
842 # work properly with relocatable Butler repo
843 butlerConfig.configFile = None
844 with self.assertRaises(ValueError):
845 Butler(butlerConfig)
847 with self.assertRaises(FileExistsError):
848 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
849 config=Config(self.configFile), overwrite=False)
851 def testStringification(self):
852 butler = Butler(self.tmpConfigFile, run="ingest")
853 butlerStr = str(butler)
855 if self.datastoreStr is not None:
856 for testStr in self.datastoreStr:
857 self.assertIn(testStr, butlerStr)
858 if self.registryStr is not None:
859 self.assertIn(self.registryStr, butlerStr)
861 datastoreName = butler.datastore.name
862 if self.datastoreName is not None:
863 for testStr in self.datastoreName:
864 self.assertIn(testStr, datastoreName)
867class FileLikeDatastoreButlerTests(ButlerTests):
868 """Common tests and specialization of ButlerTests for butlers backed
869 by datastores that inherit from FileLikeDatastore.
870 """
872 def checkFileExists(self, root, relpath):
873 """Checks if file exists at a given path (relative to root).
875 Test testPutTemplates verifies actual physical existance of the files
876 in the requested location.
877 """
878 uri = ButlerURI(root, forceDirectory=True)
879 return uri.join(relpath).exists()
881 def testPutTemplates(self):
882 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
883 butler = Butler(self.tmpConfigFile, run="ingest")
885 # Add needed Dimensions
886 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
887 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
888 "name": "d-r",
889 "band": "R"})
890 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
891 "physical_filter": "d-r"})
892 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
893 "physical_filter": "d-r"})
895 # Create and store a dataset
896 metric = makeExampleMetrics()
898 # Create two almost-identical DatasetTypes (both will use default
899 # template)
900 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
901 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
902 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
903 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
905 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
906 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
908 # Put with exactly the data ID keys needed
909 ref = butler.put(metric, "metric1", dataId1)
910 uri = butler.getURI(ref)
911 self.assertTrue(self.checkFileExists(butler.datastore.root,
912 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
913 f"Checking existence of {uri}")
915 # Check the template based on dimensions
916 butler.datastore.templates.validateTemplates([ref])
918 # Put with extra data ID keys (physical_filter is an optional
919 # dependency); should not change template (at least the way we're
920 # defining them to behave now; the important thing is that they
921 # must be consistent).
922 ref = butler.put(metric, "metric2", dataId2)
923 uri = butler.getURI(ref)
924 self.assertTrue(self.checkFileExists(butler.datastore.root,
925 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
926 f"Checking existence of {uri}")
928 # Check the template based on dimensions
929 butler.datastore.templates.validateTemplates([ref])
931 # Now use a file template that will not result in unique filenames
932 with self.assertRaises(FileTemplateValidationError):
933 butler.put(metric, "metric3", dataId1)
935 def testImportExport(self):
936 # Run put/get tests just to create and populate a repo.
937 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
938 self.runImportExportTest(storageClass)
940 @unittest.expectedFailure
941 def testImportExportVirtualComposite(self):
942 # Run put/get tests just to create and populate a repo.
943 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
944 self.runImportExportTest(storageClass)
946 def runImportExportTest(self, storageClass):
947 """This test does an export to a temp directory and an import back
948 into a new temp directory repo. It does not assume a posix datastore"""
949 exportButler = self.runPutGetTest(storageClass, "test_metric")
950 print("Root:", exportButler.datastore.root)
951 # Test that the repo actually has at least one dataset.
952 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
953 self.assertGreater(len(datasets), 0)
954 # Add a DimensionRecord that's unused by those datasets.
955 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
956 exportButler.registry.insertDimensionData("skymap", skymapRecord)
957 # Export and then import datasets.
958 with tempfile.TemporaryDirectory() as exportDir:
959 exportFile = os.path.join(exportDir, "exports.yaml")
960 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
961 export.saveDatasets(datasets)
962 # Export the same datasets again. This should quietly do
963 # nothing because of internal deduplication, and it shouldn't
964 # complain about being asked to export the "htm7" elements even
965 # though there aren't any in these datasets or in the database.
966 export.saveDatasets(datasets, elements=["htm7"])
967 # Save one of the data IDs again; this should be harmless
968 # because of internal deduplication.
969 export.saveDataIds([datasets[0].dataId])
970 # Save some dimension records directly.
971 export.saveDimensionData("skymap", [skymapRecord])
972 self.assertTrue(os.path.exists(exportFile))
973 with tempfile.TemporaryDirectory() as importDir:
974 # We always want this to be a local posix butler
975 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
976 # Calling script.butlerImport tests the implementation of the
977 # butler command line interface "import" subcommand. Functions
978 # in the script folder are generally considered protected and
979 # should not be used as public api.
980 with open(exportFile, "r") as f:
981 script.butlerImport(importDir, export_file=f,
982 directory=exportDir, transfer="auto", skip_dimensions=None)
983 importButler = Butler(importDir, run="ingest/run")
984 for ref in datasets:
985 with self.subTest(ref=ref):
986 # Test for existence by passing in the DatasetType and
987 # data ID separately, to avoid lookup by dataset_id.
988 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
989 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
990 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
993class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
994 """PosixDatastore specialization of a butler"""
995 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
996 fullConfigKey = ".datastore.formatters"
997 validationCanFail = True
998 datastoreStr = ["/tmp"]
999 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
1000 registryStr = "/gen3.sqlite3"
1002 def testExportTransferCopy(self):
1003 """Test local export using all transfer modes"""
1004 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1005 exportButler = self.runPutGetTest(storageClass, "test_metric")
1006 # Test that the repo actually has at least one dataset.
1007 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1008 self.assertGreater(len(datasets), 0)
1009 uris = [exportButler.getURI(d) for d in datasets]
1010 datastoreRoot = exportButler.datastore.root
1012 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1014 for path in pathsInStore:
1015 # Assume local file system
1016 self.assertTrue(self.checkFileExists(datastoreRoot, path),
1017 f"Checking path {path}")
1019 for transfer in ("copy", "link", "symlink", "relsymlink"):
1020 with tempfile.TemporaryDirectory(dir=TESTDIR) as exportDir:
1021 with exportButler.export(directory=exportDir, format="yaml",
1022 transfer=transfer) as export:
1023 export.saveDatasets(datasets)
1024 for path in pathsInStore:
1025 self.assertTrue(self.checkFileExists(exportDir, path),
1026 f"Check that mode {transfer} exported files")
1029class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1030 """InMemoryDatastore specialization of a butler"""
1031 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1032 fullConfigKey = None
1033 useTempRoot = False
1034 validationCanFail = False
1035 datastoreStr = ["datastore='InMemory"]
1036 datastoreName = ["InMemoryDatastore@"]
1037 registryStr = ":memory:"
1039 def testIngest(self):
1040 pass
1043class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1044 """PosixDatastore specialization"""
1045 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1046 fullConfigKey = ".datastore.datastores.1.formatters"
1047 validationCanFail = True
1048 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1/,", "/PosixDatastore_2/'"]
1049 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
1050 "SecondDatastore"]
1051 registryStr = "/gen3.sqlite3"
1054class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1055 """Test that a yaml file in one location can refer to a root in another."""
1057 datastoreStr = ["dir1"]
1058 # Disable the makeRepo test since we are deliberately not using
1059 # butler.yaml as the config name.
1060 fullConfigKey = None
1062 def setUp(self):
1063 self.root = tempfile.mkdtemp(dir=TESTDIR)
1065 # Make a new repository in one place
1066 self.dir1 = os.path.join(self.root, "dir1")
1067 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1069 # Move the yaml file to a different place and add a "root"
1070 self.dir2 = os.path.join(self.root, "dir2")
1071 safeMakeDir(self.dir2)
1072 configFile1 = os.path.join(self.dir1, "butler.yaml")
1073 config = Config(configFile1)
1074 config["root"] = self.dir1
1075 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1076 config.dumpToUri(configFile2)
1077 os.remove(configFile1)
1078 self.tmpConfigFile = configFile2
1080 def testFileLocations(self):
1081 self.assertNotEqual(self.dir1, self.dir2)
1082 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1083 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1084 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1087class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1088 """Test that a config file created by makeRepo outside of repo works."""
1090 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1092 def setUp(self):
1093 self.root = tempfile.mkdtemp(dir=TESTDIR)
1094 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1096 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1097 Butler.makeRepo(self.root, config=Config(self.configFile),
1098 outfile=self.tmpConfigFile)
1100 def tearDown(self):
1101 if os.path.exists(self.root2):
1102 shutil.rmtree(self.root2, ignore_errors=True)
1103 super().tearDown()
1105 def testConfigExistence(self):
1106 c = Config(self.tmpConfigFile)
1107 uri_config = ButlerURI(c["root"])
1108 uri_expected = ButlerURI(self.root, forceDirectory=True)
1109 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1110 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1112 def testPutGet(self):
1113 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1114 self.runPutGetTest(storageClass, "test_metric")
1117class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1118 """Test that a config file created by makeRepo outside of repo works."""
1120 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1122 def setUp(self):
1123 self.root = tempfile.mkdtemp(dir=TESTDIR)
1124 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1126 self.tmpConfigFile = self.root2
1127 Butler.makeRepo(self.root, config=Config(self.configFile),
1128 outfile=self.tmpConfigFile)
1130 def testConfigExistence(self):
1131 # Append the yaml file else Config constructor does not know the file
1132 # type.
1133 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1134 super().testConfigExistence()
1137class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1138 """Test that a config file created by makeRepo outside of repo works."""
1140 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1142 def setUp(self):
1143 self.root = tempfile.mkdtemp(dir=TESTDIR)
1144 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1146 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1147 Butler.makeRepo(self.root, config=Config(self.configFile),
1148 outfile=self.tmpConfigFile)
1151@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1152@mock_s3
1153class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1154 """S3Datastore specialization of a butler; an S3 storage Datastore +
1155 a local in-memory SqlRegistry.
1156 """
1157 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1158 fullConfigKey = None
1159 validationCanFail = True
1161 bucketName = "anybucketname"
1162 """Name of the Bucket that will be used in the tests. The name is read from
1163 the config file used with the tests during set-up.
1164 """
1166 root = "butlerRoot/"
1167 """Root repository directory expected to be used in case useTempRoot=False.
1168 Otherwise the root is set to a 20 characters long randomly generated string
1169 during set-up.
1170 """
1172 datastoreStr = [f"datastore={root}"]
1173 """Contains all expected root locations in a format expected to be
1174 returned by Butler stringification.
1175 """
1177 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1178 """The expected format of the S3Datastore string."""
1180 registryStr = ":memory:"
1181 """Expected format of the Registry string."""
1183 def genRoot(self):
1184 """Returns a random string of len 20 to serve as a root
1185 name for the temporary bucket repo.
1187 This is equivalent to tempfile.mkdtemp as this is what self.root
1188 becomes when useTempRoot is True.
1189 """
1190 rndstr = "".join(
1191 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1192 )
1193 return rndstr + "/"
1195 def setUp(self):
1196 config = Config(self.configFile)
1197 uri = ButlerURI(config[".datastore.datastore.root"])
1198 self.bucketName = uri.netloc
1200 # set up some fake credentials if they do not exist
1201 self.usingDummyCredentials = setAwsEnvCredentials()
1203 if self.useTempRoot:
1204 self.root = self.genRoot()
1205 rooturi = f"s3://{self.bucketName}/{self.root}"
1206 config.update({"datastore": {"datastore": {"root": rooturi}}})
1208 # MOTO needs to know that we expect Bucket bucketname to exist
1209 # (this used to be the class attribute bucketName)
1210 s3 = boto3.resource("s3")
1211 s3.create_bucket(Bucket=self.bucketName)
1213 self.datastoreStr = f"datastore={self.root}"
1214 self.datastoreName = [f"S3Datastore@{rooturi}"]
1215 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1216 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1218 def tearDown(self):
1219 s3 = boto3.resource("s3")
1220 bucket = s3.Bucket(self.bucketName)
1221 try:
1222 bucket.objects.all().delete()
1223 except botocore.exceptions.ClientError as e:
1224 if e.response["Error"]["Code"] == "404":
1225 # the key was not reachable - pass
1226 pass
1227 else:
1228 raise
1230 bucket = s3.Bucket(self.bucketName)
1231 bucket.delete()
1233 # unset any potentially set dummy credentials
1234 if self.usingDummyCredentials:
1235 unsetAwsEnvCredentials()
1238@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1239# Mock required environment variables during tests
1240@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1241 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1242 TESTDIR, "config/testConfigs/webdav/token"),
1243 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1244class WebdavDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1245 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1246 a local in-memory SqlRegistry.
1247 """
1248 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1249 fullConfigKey = None
1250 validationCanFail = True
1252 serverName = "localhost"
1253 """Name of the server that will be used in the tests.
1254 """
1256 portNumber = 8080
1257 """Port on which the webdav server listens. Automatically chosen
1258 at setUpClass via the _getfreeport() method
1259 """
1261 root = "butlerRoot/"
1262 """Root repository directory expected to be used in case useTempRoot=False.
1263 Otherwise the root is set to a 20 characters long randomly generated string
1264 during set-up.
1265 """
1267 datastoreStr = [f"datastore={root}"]
1268 """Contains all expected root locations in a format expected to be
1269 returned by Butler stringification.
1270 """
1272 datastoreName = ["WebdavDatastore@https://{serverName}/{root}"]
1273 """The expected format of the WebdavDatastore string."""
1275 registryStr = ":memory:"
1276 """Expected format of the Registry string."""
1278 serverThread = None
1279 """Thread in which the local webdav server will run"""
1281 stopWebdavServer = False
1282 """This flag will cause the webdav server to
1283 gracefully shut down when True
1284 """
1286 def genRoot(self):
1287 """Returns a random string of len 20 to serve as a root
1288 name for the temporary bucket repo.
1290 This is equivalent to tempfile.mkdtemp as this is what self.root
1291 becomes when useTempRoot is True.
1292 """
1293 rndstr = "".join(
1294 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1295 )
1296 return rndstr + "/"
1298 @classmethod
1299 def setUpClass(cls):
1300 # Do the same as inherited class
1301 cls.storageClassFactory = StorageClassFactory()
1302 cls.storageClassFactory.addFromConfig(cls.configFile)
1304 cls.portNumber = cls._getfreeport()
1305 # Run a local webdav server on which tests will be run
1306 cls.serverThread = Thread(target=cls._serveWebdav,
1307 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1308 daemon=True)
1309 cls.serverThread.start()
1310 # Wait for it to start
1311 time.sleep(3)
1313 @classmethod
1314 def tearDownClass(cls):
1315 # Ask for graceful shut down of the webdav server
1316 cls.stopWebdavServer = True
1317 # Wait for the thread to exit
1318 cls.serverThread.join()
1320 # Mock required environment variables during tests
1321 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1322 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1323 TESTDIR, "config/testConfigs/webdav/token"),
1324 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1325 def setUp(self):
1326 config = Config(self.configFile)
1328 if self.useTempRoot:
1329 self.root = self.genRoot()
1330 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1331 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1333 self.datastoreStr = f"datastore={self.root}"
1334 self.datastoreName = [f"WebdavDatastore@{self.rooturi}"]
1336 if not isWebdavEndpoint(self.rooturi):
1337 raise OSError("Webdav server not running properly: cannot run tests.")
1339 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1340 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1342 # Mock required environment variables during tests
1343 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1344 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1345 TESTDIR, "config/testConfigs/webdav/token"),
1346 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1347 def tearDown(self):
1348 # Clear temporary directory
1349 ButlerURI(self.rooturi).remove()
1350 ButlerURI(self.rooturi).session.close()
1352 def _serveWebdav(self, port: int, stopWebdavServer):
1353 """Starts a local webdav-compatible HTTP server,
1354 Listening on http://localhost:8080
1355 This server only runs when this test class is instantiated,
1356 and then shuts down. Must be started is a separate thread.
1358 Parameters
1359 ----------
1360 port : `int`
1361 The port number on which the server should listen
1362 """
1363 root_path = gettempdir()
1365 config = {
1366 "host": "0.0.0.0",
1367 "port": port,
1368 "provider_mapping": {"/": root_path},
1369 "http_authenticator": {
1370 "domain_controller": None
1371 },
1372 "simple_dc": {"user_mapping": {"*": True}},
1373 "verbose": 0,
1374 }
1375 app = WsgiDAVApp(config)
1377 server_args = {
1378 "bind_addr": (config["host"], config["port"]),
1379 "wsgi_app": app,
1380 }
1381 server = wsgi.Server(**server_args)
1382 server.prepare()
1384 try:
1385 # Start the actual server in a separate thread
1386 t = Thread(target=server.serve, daemon=True)
1387 t.start()
1388 # watch stopWebdavServer, and gracefully
1389 # shut down the server when True
1390 while True:
1391 if stopWebdavServer():
1392 break
1393 time.sleep(1)
1394 except KeyboardInterrupt:
1395 print("Caught Ctrl-C, shutting down...")
1396 finally:
1397 server.stop()
1398 t.join()
1400 def _getfreeport():
1401 """
1402 Determines a free port using sockets.
1403 """
1404 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1405 free_socket.bind(('0.0.0.0', 0))
1406 free_socket.listen()
1407 port = free_socket.getsockname()[1]
1408 free_socket.close()
1409 return port
1412if __name__ == "__main__": 1412 ↛ 1413line 1412 didn't jump to line 1413, because the condition on line 1412 was never true
1413 unittest.main()