Coverage for tests/test_butler.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import time
34import socket
36try:
37 import boto3
38 import botocore
39 from moto import mock_s3
40except ImportError:
41 boto3 = None
43 def mock_s3(cls):
44 """A no-op decorator in case moto mock_s3 can not be imported.
45 """
46 return cls
48try:
49 from cheroot import wsgi
50 from wsgidav.wsgidav_app import WsgiDAVApp
51except ImportError:
52 WsgiDAVApp = None
54import astropy.time
55from threading import Thread
56from tempfile import gettempdir
57from lsst.utils import doImport
58from lsst.daf.butler.core.utils import safeMakeDir
59from lsst.daf.butler import Butler, Config, ButlerConfig
60from lsst.daf.butler import StorageClassFactory
61from lsst.daf.butler import DatasetType, DatasetRef
62from lsst.daf.butler import FileTemplateValidationError, ValidationError
63from lsst.daf.butler import FileDataset
64from lsst.daf.butler import CollectionSearch, CollectionType
65from lsst.daf.butler import ButlerURI
66from lsst.daf.butler import script
67from lsst.daf.butler.registry import MissingCollectionError
68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
69from lsst.daf.butler.core.s3utils import (setAwsEnvCredentials,
70 unsetAwsEnvCredentials)
71from lsst.daf.butler.core.webdavutils import isWebdavEndpoint
73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
75TESTDIR = os.path.abspath(os.path.dirname(__file__))
78def makeExampleMetrics():
79 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
80 {"a": [1, 2, 3],
81 "b": {"blue": 5, "red": "green"}},
82 [563, 234, 456.7, 752, 8, 9, 27]
83 )
86class TransactionTestError(Exception):
87 """Specific error for testing transactions, to prevent misdiagnosing
88 that might otherwise occur when a standard exception is used.
89 """
90 pass
93class ButlerConfigTests(unittest.TestCase):
94 """Simple tests for ButlerConfig that are not tested in other test cases.
95 """
97 def testSearchPath(self):
98 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
99 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
100 config1 = ButlerConfig(configFile)
101 self.assertNotIn("testConfigs", "\n".join(cm.output))
103 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
104 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
105 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
106 self.assertIn("testConfigs", "\n".join(cm.output))
108 key = ("datastore", "records", "table")
109 self.assertNotEqual(config1[key], config2[key])
110 self.assertEqual(config2[key], "override_record")
113class ButlerPutGetTests:
114 """Helper method for running a suite of put/get tests from different
115 butler configurations."""
117 root = None
119 @staticmethod
120 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
121 """Create a DatasetType and register it
122 """
123 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
124 registry.registerDatasetType(datasetType)
125 return datasetType
127 @classmethod
128 def setUpClass(cls):
129 cls.storageClassFactory = StorageClassFactory()
130 cls.storageClassFactory.addFromConfig(cls.configFile)
132 def assertGetComponents(self, butler, datasetRef, components, reference):
133 datasetType = datasetRef.datasetType
134 dataId = datasetRef.dataId
135 for component in components:
136 compTypeName = datasetType.componentTypeName(component)
137 result = butler.get(compTypeName, dataId)
138 self.assertEqual(result, getattr(reference, component))
140 def tearDown(self):
141 if self.root is not None and os.path.exists(self.root):
142 shutil.rmtree(self.root, ignore_errors=True)
144 def runPutGetTest(self, storageClass, datasetTypeName):
145 # New datasets will be added to run and tag, but we will only look in
146 # tag when looking up datasets.
147 run = "ingest/run"
148 tag = "ingest"
149 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
151 # There will not be a collection yet
152 collections = set(butler.registry.queryCollections())
153 self.assertEqual(collections, set([run, tag]))
155 # Create and register a DatasetType
156 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
158 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
160 # Try to create one that will have a name that is too long
161 with self.assertRaises(Exception) as cm:
162 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry)
163 self.assertIn("check constraint", str(cm.exception).lower())
165 # Add needed Dimensions
166 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
167 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
168 "name": "d-r",
169 "abstract_filter": "R"})
170 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
171 "id": 1,
172 "name": "default"})
173 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
174 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
175 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
176 "name": "fourtwentythree", "physical_filter": "d-r",
177 "visit_system": 1, "datetime_begin": visit_start,
178 "datetime_end": visit_end})
180 # Add a second visit for some later tests
181 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
182 "name": "fourtwentyfour", "physical_filter": "d-r",
183 "visit_system": 1})
185 # Create and store a dataset
186 metric = makeExampleMetrics()
187 dataId = {"instrument": "DummyCamComp", "visit": 423}
189 # Create a DatasetRef for put
190 refIn = DatasetRef(datasetType, dataId, id=None)
192 # Put with a preexisting id should fail
193 with self.assertRaises(ValueError):
194 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
196 # Put and remove the dataset once as a DatasetRef, once as a dataId,
197 # and once with a DatasetType
198 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
199 with self.subTest(args=args):
200 ref = butler.put(metric, *args)
201 self.assertIsInstance(ref, DatasetRef)
203 # Test getDirect
204 metricOut = butler.getDirect(ref)
205 self.assertEqual(metric, metricOut)
206 # Test get
207 metricOut = butler.get(ref.datasetType.name, dataId)
208 self.assertEqual(metric, metricOut)
209 # Test get with a datasetRef
210 metricOut = butler.get(ref)
211 self.assertEqual(metric, metricOut)
212 # Test getDeferred with dataId
213 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
214 self.assertEqual(metric, metricOut)
215 # Test getDeferred with a datasetRef
216 metricOut = butler.getDeferred(ref).get()
217 self.assertEqual(metric, metricOut)
219 # Check we can get components
220 if storageClass.isComposite():
221 self.assertGetComponents(butler, ref,
222 ("summary", "data", "output"), metric)
224 # Remove from the tagged collection only; after that we
225 # shouldn't be able to find it unless we use the dataset_id.
226 butler.pruneDatasets([ref])
227 with self.assertRaises(LookupError):
228 butler.datasetExists(*args)
229 # Registry still knows about it, if we use the dataset_id.
230 self.assertEqual(butler.registry.getDataset(ref.id), ref)
231 # If we use the output ref with the dataset_id, we should
232 # still be able to load it with getDirect().
233 self.assertEqual(metric, butler.getDirect(ref))
235 # Reinsert into collection, then delete from Datastore *and*
236 # remove from collection.
237 butler.registry.associate(tag, [ref])
238 butler.pruneDatasets([ref], unstore=True)
239 # Lookup with original args should still fail.
240 with self.assertRaises(LookupError):
241 butler.datasetExists(*args)
242 # Now getDirect() should fail, too.
243 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
244 butler.getDirect(ref)
245 # Registry still knows about it, if we use the dataset_id.
246 self.assertEqual(butler.registry.getDataset(ref.id), ref)
248 # Now remove the dataset completely.
249 butler.pruneDatasets([ref], purge=True, unstore=True)
250 # Lookup with original args should still fail.
251 with self.assertRaises(LookupError):
252 butler.datasetExists(*args)
253 # getDirect() should still fail.
254 with self.assertRaises(FileNotFoundError):
255 butler.getDirect(ref)
256 # Registry shouldn't be able to find it by dataset_id anymore.
257 self.assertIsNone(butler.registry.getDataset(ref.id))
259 # Put the dataset again, since the last thing we did was remove it.
260 ref = butler.put(metric, refIn)
262 # Get with parameters
263 stop = 4
264 sliced = butler.get(ref, parameters={"slice": slice(stop)})
265 self.assertNotEqual(metric, sliced)
266 self.assertEqual(metric.summary, sliced.summary)
267 self.assertEqual(metric.output, sliced.output)
268 self.assertEqual(metric.data[:stop], sliced.data)
269 # getDeferred with parameters
270 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
271 self.assertNotEqual(metric, sliced)
272 self.assertEqual(metric.summary, sliced.summary)
273 self.assertEqual(metric.output, sliced.output)
274 self.assertEqual(metric.data[:stop], sliced.data)
275 # getDeferred with deferred parameters
276 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
277 self.assertNotEqual(metric, sliced)
278 self.assertEqual(metric.summary, sliced.summary)
279 self.assertEqual(metric.output, sliced.output)
280 self.assertEqual(metric.data[:stop], sliced.data)
282 if storageClass.isComposite():
283 # Check that components can be retrieved
284 metricOut = butler.get(ref.datasetType.name, dataId)
285 compNameS = ref.datasetType.componentTypeName("summary")
286 compNameD = ref.datasetType.componentTypeName("data")
287 summary = butler.get(compNameS, dataId)
288 self.assertEqual(summary, metric.summary)
289 data = butler.get(compNameD, dataId)
290 self.assertEqual(data, metric.data)
292 if "counter" in storageClass.readComponents:
293 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
294 self.assertEqual(count, len(data))
296 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
297 parameters={"slice": slice(stop)})
298 self.assertEqual(count, stop)
300 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
301 summary = butler.getDirect(compRef)
302 self.assertEqual(summary, metric.summary)
304 # Create a Dataset type that has the same name but is inconsistent.
305 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
306 self.storageClassFactory.getStorageClass("Config"))
308 # Getting with a dataset type that does not match registry fails
309 with self.assertRaises(ValueError):
310 butler.get(inconsistentDatasetType, dataId)
312 # Combining a DatasetRef with a dataId should fail
313 with self.assertRaises(ValueError):
314 butler.get(ref, dataId)
315 # Getting with an explicit ref should fail if the id doesn't match
316 with self.assertRaises(ValueError):
317 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
319 # Getting a dataset with unknown parameters should fail
320 with self.assertRaises(KeyError):
321 butler.get(ref, parameters={"unsupported": True})
323 # Check we have a collection
324 collections = set(butler.registry.queryCollections())
325 self.assertEqual(collections, {run, tag})
327 # Clean up to check that we can remove something that may have
328 # already had a component removed
329 butler.pruneDatasets([ref], unstore=True, purge=True)
331 # Add a dataset back in since some downstream tests require
332 # something to be present
333 ref = butler.put(metric, refIn)
335 return butler
337 def testDeferredCollectionPassing(self):
338 # Construct a butler with no run or collection, but make it writeable.
339 butler = Butler(self.tmpConfigFile, writeable=True)
340 # Create and register a DatasetType
341 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
342 datasetType = self.addDatasetType("example", dimensions,
343 self.storageClassFactory.getStorageClass("StructuredData"),
344 butler.registry)
345 # Add needed Dimensions
346 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
347 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
348 "name": "d-r",
349 "abstract_filter": "R"})
350 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
351 "name": "fourtwentythree", "physical_filter": "d-r"})
352 dataId = {"instrument": "DummyCamComp", "visit": 423}
353 # Create dataset.
354 metric = makeExampleMetrics()
355 # Register a new run and put dataset.
356 run = "deferred"
357 butler.registry.registerRun(run)
358 ref = butler.put(metric, datasetType, dataId, run=run)
359 # Putting with no run should fail with TypeError.
360 with self.assertRaises(TypeError):
361 butler.put(metric, datasetType, dataId)
362 # Dataset should exist.
363 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
364 # We should be able to get the dataset back, but with and without
365 # a deferred dataset handle.
366 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
367 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
368 # Trying to find the dataset without any collection is a TypeError.
369 with self.assertRaises(TypeError):
370 butler.datasetExists(datasetType, dataId)
371 with self.assertRaises(TypeError):
372 butler.get(datasetType, dataId)
373 # Associate the dataset with a different collection.
374 butler.registry.registerCollection("tagged")
375 butler.registry.associate("tagged", [ref])
376 # Deleting the dataset from the new collection should make it findable
377 # in the original collection.
378 butler.pruneDatasets([ref], tags=["tagged"])
379 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
382class ButlerTests(ButlerPutGetTests):
383 """Tests for Butler.
384 """
385 useTempRoot = True
387 def setUp(self):
388 """Create a new butler root for each test."""
389 if self.useTempRoot:
390 self.root = tempfile.mkdtemp(dir=TESTDIR)
391 Butler.makeRepo(self.root, config=Config(self.configFile))
392 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
393 else:
394 self.root = None
395 self.tmpConfigFile = self.configFile
397 def testConstructor(self):
398 """Independent test of constructor.
399 """
400 butler = Butler(self.tmpConfigFile, run="ingest")
401 self.assertIsInstance(butler, Butler)
403 collections = set(butler.registry.queryCollections())
404 self.assertEqual(collections, {"ingest"})
406 butler2 = Butler(butler=butler, collections=["other"])
407 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
408 self.assertIsNone(butler2.run)
409 self.assertIs(butler.registry, butler2.registry)
410 self.assertIs(butler.datastore, butler2.datastore)
412 def testBasicPutGet(self):
413 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
414 self.runPutGetTest(storageClass, "test_metric")
416 def testCompositePutGetConcrete(self):
418 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
419 butler = self.runPutGetTest(storageClass, "test_metric")
421 # Should *not* be disassembled
422 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
423 self.assertEqual(len(datasets), 1)
424 uri, components = butler.getURIs(datasets[0])
425 self.assertIsInstance(uri, ButlerURI)
426 self.assertFalse(components)
427 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
428 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
430 # Predicted dataset
431 dataId = {"instrument": "DummyCamComp", "visit": 424}
432 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
433 self.assertFalse(components)
434 self.assertIsInstance(uri, ButlerURI)
435 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
436 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
438 def testCompositePutGetVirtual(self):
439 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
440 butler = self.runPutGetTest(storageClass, "test_metric_comp")
442 # Should be disassembled
443 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
444 self.assertEqual(len(datasets), 1)
445 uri, components = butler.getURIs(datasets[0])
447 if butler.datastore.isEphemeral:
448 # Never disassemble in-memory datastore
449 self.assertIsInstance(uri, ButlerURI)
450 self.assertFalse(components)
451 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
452 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
453 else:
454 self.assertIsNone(uri)
455 self.assertEqual(set(components), set(storageClass.components))
456 for compuri in components.values():
457 self.assertIsInstance(compuri, ButlerURI)
458 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
459 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
461 # Predicted dataset
462 dataId = {"instrument": "DummyCamComp", "visit": 424}
463 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
465 if butler.datastore.isEphemeral:
466 # Never disassembled
467 self.assertIsInstance(uri, ButlerURI)
468 self.assertFalse(components)
469 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
470 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
471 else:
472 self.assertIsNone(uri)
473 self.assertEqual(set(components), set(storageClass.components))
474 for compuri in components.values():
475 self.assertIsInstance(compuri, ButlerURI)
476 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
477 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
479 def testIngest(self):
480 butler = Butler(self.tmpConfigFile, run="ingest")
482 # Create and register a DatasetType
483 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
485 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
486 datasetTypeName = "metric"
488 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
490 # Add needed Dimensions
491 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
492 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
493 "name": "d-r",
494 "abstract_filter": "R"})
495 for detector in (1, 2):
496 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
497 "full_name": f"detector{detector}"})
499 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
500 "name": "fourtwentythree", "physical_filter": "d-r"},
501 {"instrument": "DummyCamComp", "id": 424,
502 "name": "fourtwentyfour", "physical_filter": "d-r"})
504 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
505 dataRoot = os.path.join(TESTDIR, "data", "basic")
506 datasets = []
507 for detector in (1, 2):
508 detector_name = f"detector_{detector}"
509 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
510 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
511 # Create a DatasetRef for ingest
512 refIn = DatasetRef(datasetType, dataId, id=None)
514 datasets.append(FileDataset(path=metricFile,
515 refs=[refIn],
516 formatter=formatter))
518 butler.ingest(*datasets, transfer="copy")
520 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
521 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
523 metrics1 = butler.get(datasetTypeName, dataId1)
524 metrics2 = butler.get(datasetTypeName, dataId2)
525 self.assertNotEqual(metrics1, metrics2)
527 # Compare URIs
528 uri1 = butler.getURI(datasetTypeName, dataId1)
529 uri2 = butler.getURI(datasetTypeName, dataId2)
530 self.assertNotEqual(uri1, uri2)
532 # Now do a multi-dataset but single file ingest
533 metricFile = os.path.join(dataRoot, "detectors.yaml")
534 refs = []
535 for detector in (1, 2):
536 detector_name = f"detector_{detector}"
537 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
538 # Create a DatasetRef for ingest
539 refs.append(DatasetRef(datasetType, dataId, id=None))
541 datasets = []
542 datasets.append(FileDataset(path=metricFile,
543 refs=refs,
544 formatter=MultiDetectorFormatter))
546 butler.ingest(*datasets, transfer="copy")
548 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
549 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
551 multi1 = butler.get(datasetTypeName, dataId1)
552 multi2 = butler.get(datasetTypeName, dataId2)
554 self.assertEqual(multi1, metrics1)
555 self.assertEqual(multi2, metrics2)
557 # Compare URIs
558 uri1 = butler.getURI(datasetTypeName, dataId1)
559 uri2 = butler.getURI(datasetTypeName, dataId2)
560 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
562 # Test that removing one does not break the second
563 # This line will issue a warning log message for a ChainedDatastore
564 # that uses an InMemoryDatastore since in-memory can not ingest
565 # files.
566 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
567 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
568 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
569 multi2b = butler.get(datasetTypeName, dataId2)
570 self.assertEqual(multi2, multi2b)
572 def testPruneCollections(self):
573 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
574 butler = Butler(self.tmpConfigFile, writeable=True)
575 # Load registry data with dimensions to hang datasets off of.
576 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
577 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
578 # Add some RUN-type collections.
579 run1 = "run1"
580 butler.registry.registerRun(run1)
581 run2 = "run2"
582 butler.registry.registerRun(run2)
583 # put some datasets. ref1 and ref2 have the same data ID, and are in
584 # different runs. ref3 has a different data ID.
585 metric = makeExampleMetrics()
586 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
587 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
588 butler.registry)
589 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
590 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
591 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
592 # Try to delete a RUN collection without purge, or with purge and not
593 # unstore.
594 with self.assertRaises(TypeError):
595 butler.pruneCollection(run1)
596 with self.assertRaises(TypeError):
597 butler.pruneCollection(run2, purge=True)
598 # Add a TAGGED collection and associate ref3 only into it.
599 tag1 = "tag1"
600 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
601 butler.registry.associate(tag1, [ref3])
602 # Add a CHAINED collection that searches run1 and then run2. It
603 # logically contains only ref1, because ref2 is shadowed due to them
604 # having the same data ID and dataset type.
605 chain1 = "chain1"
606 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
607 butler.registry.setCollectionChain(chain1, [run1, run2])
608 # Try to delete RUN collections, which should fail with complete
609 # rollback because they're still referenced by the CHAINED
610 # collection.
611 with self.assertRaises(Exception):
612 butler.pruneCollection(run1, pruge=True, unstore=True)
613 with self.assertRaises(Exception):
614 butler.pruneCollection(run2, pruge=True, unstore=True)
615 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
616 [ref1, ref2, ref3])
617 self.assertTrue(butler.datastore.exists(ref1))
618 self.assertTrue(butler.datastore.exists(ref2))
619 self.assertTrue(butler.datastore.exists(ref3))
620 # Try to delete CHAINED and TAGGED collections with purge; should not
621 # work.
622 with self.assertRaises(TypeError):
623 butler.pruneCollection(tag1, purge=True, unstore=True)
624 with self.assertRaises(TypeError):
625 butler.pruneCollection(chain1, purge=True, unstore=True)
626 # Remove the tagged collection with unstore=False. This should not
627 # affect the datasets.
628 butler.pruneCollection(tag1)
629 with self.assertRaises(MissingCollectionError):
630 butler.registry.getCollectionType(tag1)
631 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
632 [ref1, ref2, ref3])
633 self.assertTrue(butler.datastore.exists(ref1))
634 self.assertTrue(butler.datastore.exists(ref2))
635 self.assertTrue(butler.datastore.exists(ref3))
636 # Add the tagged collection back in, and remove it with unstore=True.
637 # This should remove ref3 only from the datastore.
638 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
639 butler.registry.associate(tag1, [ref3])
640 butler.pruneCollection(tag1, unstore=True)
641 with self.assertRaises(MissingCollectionError):
642 butler.registry.getCollectionType(tag1)
643 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
644 [ref1, ref2, ref3])
645 self.assertTrue(butler.datastore.exists(ref1))
646 self.assertTrue(butler.datastore.exists(ref2))
647 self.assertFalse(butler.datastore.exists(ref3))
648 # Delete the chain with unstore=False. The datasets should not be
649 # affected at all.
650 butler.pruneCollection(chain1)
651 with self.assertRaises(MissingCollectionError):
652 butler.registry.getCollectionType(chain1)
653 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
654 [ref1, ref2, ref3])
655 self.assertTrue(butler.datastore.exists(ref1))
656 self.assertTrue(butler.datastore.exists(ref2))
657 self.assertFalse(butler.datastore.exists(ref3))
658 # Redefine and then delete the chain with unstore=True. Only ref1
659 # should be unstored (ref3 has already been unstored, but otherwise
660 # would be now).
661 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
662 butler.registry.setCollectionChain(chain1, [run1, run2])
663 butler.pruneCollection(chain1, unstore=True)
664 with self.assertRaises(MissingCollectionError):
665 butler.registry.getCollectionType(chain1)
666 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
667 [ref1, ref2, ref3])
668 self.assertFalse(butler.datastore.exists(ref1))
669 self.assertTrue(butler.datastore.exists(ref2))
670 self.assertFalse(butler.datastore.exists(ref3))
671 # Remove run1. This removes ref1 and ref3 from the registry (they're
672 # already gone from the datastore, which is fine).
673 butler.pruneCollection(run1, purge=True, unstore=True)
674 with self.assertRaises(MissingCollectionError):
675 butler.registry.getCollectionType(run1)
676 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
677 [ref2])
678 self.assertTrue(butler.datastore.exists(ref2))
679 # Remove run2. This removes ref2 from the registry and the datastore.
680 butler.pruneCollection(run2, purge=True, unstore=True)
681 with self.assertRaises(MissingCollectionError):
682 butler.registry.getCollectionType(run2)
683 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
684 [])
686 def testPickle(self):
687 """Test pickle support.
688 """
689 butler = Butler(self.tmpConfigFile, run="ingest")
690 butlerOut = pickle.loads(pickle.dumps(butler))
691 self.assertIsInstance(butlerOut, Butler)
692 self.assertEqual(butlerOut._config, butler._config)
693 self.assertEqual(butlerOut.collections, butler.collections)
694 self.assertEqual(butlerOut.run, butler.run)
696 def testGetDatasetTypes(self):
697 butler = Butler(self.tmpConfigFile, run="ingest")
698 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
699 dimensionEntries = [
700 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
701 {"instrument": "DummyCamComp"}),
702 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
703 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
704 ]
705 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
706 # Add needed Dimensions
707 for args in dimensionEntries:
708 butler.registry.insertDimensionData(*args)
710 # When a DatasetType is added to the registry entries are not created
711 # for components but querying them can return the components.
712 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
713 components = set()
714 for datasetTypeName in datasetTypeNames:
715 # Create and register a DatasetType
716 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
718 for componentName in storageClass.components:
719 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
721 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
722 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
724 # Now that we have some dataset types registered, validate them
725 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
726 "datasetType.component"])
728 # Add a new datasetType that will fail template validation
729 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
730 if self.validationCanFail:
731 with self.assertRaises(ValidationError):
732 butler.validateConfiguration()
734 # Rerun validation but with a subset of dataset type names
735 butler.validateConfiguration(datasetTypeNames=["metric4"])
737 # Rerun validation but ignore the bad datasetType
738 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
739 "datasetType.component"])
741 def testTransaction(self):
742 butler = Butler(self.tmpConfigFile, run="ingest")
743 datasetTypeName = "test_metric"
744 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
745 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
746 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
747 "abstract_filter": "R"}),
748 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
749 "physical_filter": "d-r"}))
750 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
751 metric = makeExampleMetrics()
752 dataId = {"instrument": "DummyCam", "visit": 42}
753 # Create and register a DatasetType
754 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
755 with self.assertRaises(TransactionTestError):
756 with butler.transaction():
757 # Add needed Dimensions
758 for args in dimensionEntries:
759 butler.registry.insertDimensionData(*args)
760 # Store a dataset
761 ref = butler.put(metric, datasetTypeName, dataId)
762 self.assertIsInstance(ref, DatasetRef)
763 # Test getDirect
764 metricOut = butler.getDirect(ref)
765 self.assertEqual(metric, metricOut)
766 # Test get
767 metricOut = butler.get(datasetTypeName, dataId)
768 self.assertEqual(metric, metricOut)
769 # Check we can get components
770 self.assertGetComponents(butler, ref,
771 ("summary", "data", "output"), metric)
772 raise TransactionTestError("This should roll back the entire transaction")
773 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
774 butler.registry.expandDataId(dataId)
775 # Should raise LookupError for missing data ID value
776 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
777 butler.get(datasetTypeName, dataId)
778 # Also check explicitly if Dataset entry is missing
779 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
780 # Direct retrieval should not find the file in the Datastore
781 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
782 butler.getDirect(ref)
784 def testMakeRepo(self):
785 """Test that we can write butler configuration to a new repository via
786 the Butler.makeRepo interface and then instantiate a butler from the
787 repo root.
788 """
789 # Do not run the test if we know this datastore configuration does
790 # not support a file system root
791 if self.fullConfigKey is None:
792 return
794 # Remove the file created in setUp
795 os.unlink(self.tmpConfigFile)
797 createRegistry = not self.useTempRoot
798 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile),
799 createRegistry=createRegistry)
800 limited = Config(self.configFile)
801 butler1 = Butler(butlerConfig)
802 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
803 config=Config(self.configFile), overwrite=True)
804 full = Config(self.tmpConfigFile)
805 butler2 = Butler(butlerConfig)
806 # Butlers should have the same configuration regardless of whether
807 # defaults were expanded.
808 self.assertEqual(butler1._config, butler2._config)
809 # Config files loaded directly should not be the same.
810 self.assertNotEqual(limited, full)
811 # Make sure "limited" doesn't have a few keys we know it should be
812 # inheriting from defaults.
813 self.assertIn(self.fullConfigKey, full)
814 self.assertNotIn(self.fullConfigKey, limited)
816 # Collections don't appear until something is put in them
817 collections1 = set(butler1.registry.queryCollections())
818 self.assertEqual(collections1, set())
819 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
821 # Check that a config with no associated file name will not
822 # work properly with relocatable Butler repo
823 butlerConfig.configFile = None
824 with self.assertRaises(ValueError):
825 Butler(butlerConfig)
827 with self.assertRaises(FileExistsError):
828 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
829 config=Config(self.configFile), overwrite=False)
831 def testStringification(self):
832 butler = Butler(self.tmpConfigFile, run="ingest")
833 butlerStr = str(butler)
835 if self.datastoreStr is not None:
836 for testStr in self.datastoreStr:
837 self.assertIn(testStr, butlerStr)
838 if self.registryStr is not None:
839 self.assertIn(self.registryStr, butlerStr)
841 datastoreName = butler.datastore.name
842 if self.datastoreName is not None:
843 for testStr in self.datastoreName:
844 self.assertIn(testStr, datastoreName)
847class FileLikeDatastoreButlerTests(ButlerTests):
848 """Common tests and specialization of ButlerTests for butlers backed
849 by datastores that inherit from FileLikeDatastore.
850 """
852 def checkFileExists(self, root, relpath):
853 """Checks if file exists at a given path (relative to root).
855 Test testPutTemplates verifies actual physical existance of the files
856 in the requested location.
857 """
858 uri = ButlerURI(root, forceDirectory=True)
859 return uri.join(relpath).exists()
861 def testPutTemplates(self):
862 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
863 butler = Butler(self.tmpConfigFile, run="ingest")
865 # Add needed Dimensions
866 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
867 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
868 "name": "d-r",
869 "abstract_filter": "R"})
870 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
871 "physical_filter": "d-r"})
872 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
873 "physical_filter": "d-r"})
875 # Create and store a dataset
876 metric = makeExampleMetrics()
878 # Create two almost-identical DatasetTypes (both will use default
879 # template)
880 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
881 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
882 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
883 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
885 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
886 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
888 # Put with exactly the data ID keys needed
889 ref = butler.put(metric, "metric1", dataId1)
890 uri = butler.getURI(ref)
891 self.assertTrue(self.checkFileExists(butler.datastore.root,
892 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
893 f"Checking existence of {uri}")
895 # Check the template based on dimensions
896 butler.datastore.templates.validateTemplates([ref])
898 # Put with extra data ID keys (physical_filter is an optional
899 # dependency); should not change template (at least the way we're
900 # defining them to behave now; the important thing is that they
901 # must be consistent).
902 ref = butler.put(metric, "metric2", dataId2)
903 uri = butler.getURI(ref)
904 self.assertTrue(self.checkFileExists(butler.datastore.root,
905 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
906 f"Checking existence of {uri}")
908 # Check the template based on dimensions
909 butler.datastore.templates.validateTemplates([ref])
911 # Now use a file template that will not result in unique filenames
912 with self.assertRaises(FileTemplateValidationError):
913 butler.put(metric, "metric3", dataId1)
915 def testImportExport(self):
916 # Run put/get tests just to create and populate a repo.
917 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
918 self.runImportExportTest(storageClass)
920 @unittest.expectedFailure
921 def testImportExportVirtualComposite(self):
922 # Run put/get tests just to create and populate a repo.
923 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
924 self.runImportExportTest(storageClass)
926 def runImportExportTest(self, storageClass):
927 """This test does an export to a temp directory and an import back
928 into a new temp directory repo. It does not assume a posix datastore"""
929 exportButler = self.runPutGetTest(storageClass, "test_metric")
930 print("Root:", exportButler.datastore.root)
931 # Test that the repo actually has at least one dataset.
932 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
933 self.assertGreater(len(datasets), 0)
934 # Export those datasets. We used TemporaryDirectory because there
935 # doesn't seem to be a way to get the filename (as opposed to the file
936 # object) from any of tempfile's temporary-file context managers.
937 with tempfile.TemporaryDirectory() as exportDir:
938 # TODO: When PosixDatastore supports transfer-on-exist, add tests
939 # for that.
940 exportFile = os.path.join(exportDir, "exports.yaml")
941 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
942 export.saveDatasets(datasets)
943 self.assertTrue(os.path.exists(exportFile))
944 with tempfile.TemporaryDirectory() as importDir:
945 # We always want this to be a local posix butler
946 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
947 # Calling script.butlerImport tests the implementation of the
948 # butler command line interface "import" subcommand. Functions
949 # in the script folder are generally considered protected and
950 # should not be used as public api.
951 with open(exportFile, "r") as f:
952 script.butlerImport(importDir, output_run="ingest/run", export_file=f,
953 directory=exportDir, transfer="auto", skip_dimensions=None)
954 importButler = Butler(importDir, run="ingest/run")
955 for ref in datasets:
956 with self.subTest(ref=ref):
957 # Test for existence by passing in the DatasetType and
958 # data ID separately, to avoid lookup by dataset_id.
959 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
962class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
963 """PosixDatastore specialization of a butler"""
964 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
965 fullConfigKey = ".datastore.formatters"
966 validationCanFail = True
967 datastoreStr = ["/tmp"]
968 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
969 registryStr = "/gen3.sqlite3"
971 def testExportTransferCopy(self):
972 """Test local export using all transfer modes"""
973 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
974 exportButler = self.runPutGetTest(storageClass, "test_metric")
975 # Test that the repo actually has at least one dataset.
976 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
977 self.assertGreater(len(datasets), 0)
978 uris = [exportButler.getURI(d) for d in datasets]
979 datastoreRoot = exportButler.datastore.root
981 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
983 for path in pathsInStore:
984 # Assume local file system
985 self.assertTrue(self.checkFileExists(datastoreRoot, path),
986 f"Checking path {path}")
988 for transfer in ("copy", "link", "symlink", "relsymlink"):
989 with tempfile.TemporaryDirectory(dir=TESTDIR) as exportDir:
990 with exportButler.export(directory=exportDir, format="yaml",
991 transfer=transfer) as export:
992 export.saveDatasets(datasets)
993 for path in pathsInStore:
994 self.assertTrue(self.checkFileExists(exportDir, path),
995 f"Check that mode {transfer} exported files")
998class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
999 """InMemoryDatastore specialization of a butler"""
1000 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1001 fullConfigKey = None
1002 useTempRoot = False
1003 validationCanFail = False
1004 datastoreStr = ["datastore='InMemory"]
1005 datastoreName = ["InMemoryDatastore@"]
1006 registryStr = ":memory:"
1008 def testIngest(self):
1009 pass
1012class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1013 """PosixDatastore specialization"""
1014 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1015 fullConfigKey = ".datastore.datastores.1.formatters"
1016 validationCanFail = True
1017 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1/,", "/PosixDatastore_2/'"]
1018 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
1019 "SecondDatastore"]
1020 registryStr = "/gen3.sqlite3"
1023class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1024 """Test that a yaml file in one location can refer to a root in another."""
1026 datastoreStr = ["dir1"]
1027 # Disable the makeRepo test since we are deliberately not using
1028 # butler.yaml as the config name.
1029 fullConfigKey = None
1031 def setUp(self):
1032 self.root = tempfile.mkdtemp(dir=TESTDIR)
1034 # Make a new repository in one place
1035 self.dir1 = os.path.join(self.root, "dir1")
1036 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1038 # Move the yaml file to a different place and add a "root"
1039 self.dir2 = os.path.join(self.root, "dir2")
1040 safeMakeDir(self.dir2)
1041 configFile1 = os.path.join(self.dir1, "butler.yaml")
1042 config = Config(configFile1)
1043 config["root"] = self.dir1
1044 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1045 config.dumpToUri(configFile2)
1046 os.remove(configFile1)
1047 self.tmpConfigFile = configFile2
1049 def testFileLocations(self):
1050 self.assertNotEqual(self.dir1, self.dir2)
1051 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1052 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1053 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1056class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1057 """Test that a config file created by makeRepo outside of repo works."""
1059 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1061 def setUp(self):
1062 self.root = tempfile.mkdtemp(dir=TESTDIR)
1063 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1065 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1066 Butler.makeRepo(self.root, config=Config(self.configFile),
1067 outfile=self.tmpConfigFile)
1069 def tearDown(self):
1070 if os.path.exists(self.root2):
1071 shutil.rmtree(self.root2, ignore_errors=True)
1072 super().tearDown()
1074 def testConfigExistence(self):
1075 c = Config(self.tmpConfigFile)
1076 uri_config = ButlerURI(c["root"])
1077 uri_expected = ButlerURI(self.root, forceDirectory=True)
1078 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1079 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1081 def testPutGet(self):
1082 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1083 self.runPutGetTest(storageClass, "test_metric")
1086class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1087 """Test that a config file created by makeRepo outside of repo works."""
1089 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1091 def setUp(self):
1092 self.root = tempfile.mkdtemp(dir=TESTDIR)
1093 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1095 self.tmpConfigFile = self.root2
1096 Butler.makeRepo(self.root, config=Config(self.configFile),
1097 outfile=self.tmpConfigFile)
1099 def testConfigExistence(self):
1100 # Append the yaml file else Config constructor does not know the file
1101 # type.
1102 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1103 super().testConfigExistence()
1106class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1107 """Test that a config file created by makeRepo outside of repo works."""
1109 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1111 def setUp(self):
1112 self.root = tempfile.mkdtemp(dir=TESTDIR)
1113 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1115 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1116 Butler.makeRepo(self.root, config=Config(self.configFile),
1117 outfile=self.tmpConfigFile)
1120@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1121@mock_s3
1122class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1123 """S3Datastore specialization of a butler; an S3 storage Datastore +
1124 a local in-memory SqlRegistry.
1125 """
1126 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1127 fullConfigKey = None
1128 validationCanFail = True
1130 bucketName = "anybucketname"
1131 """Name of the Bucket that will be used in the tests. The name is read from
1132 the config file used with the tests during set-up.
1133 """
1135 root = "butlerRoot/"
1136 """Root repository directory expected to be used in case useTempRoot=False.
1137 Otherwise the root is set to a 20 characters long randomly generated string
1138 during set-up.
1139 """
1141 datastoreStr = [f"datastore={root}"]
1142 """Contains all expected root locations in a format expected to be
1143 returned by Butler stringification.
1144 """
1146 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1147 """The expected format of the S3Datastore string."""
1149 registryStr = ":memory:"
1150 """Expected format of the Registry string."""
1152 def genRoot(self):
1153 """Returns a random string of len 20 to serve as a root
1154 name for the temporary bucket repo.
1156 This is equivalent to tempfile.mkdtemp as this is what self.root
1157 becomes when useTempRoot is True.
1158 """
1159 rndstr = "".join(
1160 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1161 )
1162 return rndstr + "/"
1164 def setUp(self):
1165 config = Config(self.configFile)
1166 uri = ButlerURI(config[".datastore.datastore.root"])
1167 self.bucketName = uri.netloc
1169 # set up some fake credentials if they do not exist
1170 self.usingDummyCredentials = setAwsEnvCredentials()
1172 if self.useTempRoot:
1173 self.root = self.genRoot()
1174 rooturi = f"s3://{self.bucketName}/{self.root}"
1175 config.update({"datastore": {"datastore": {"root": rooturi}}})
1177 # MOTO needs to know that we expect Bucket bucketname to exist
1178 # (this used to be the class attribute bucketName)
1179 s3 = boto3.resource("s3")
1180 s3.create_bucket(Bucket=self.bucketName)
1182 self.datastoreStr = f"datastore={self.root}"
1183 self.datastoreName = [f"S3Datastore@{rooturi}"]
1184 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1185 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1187 def tearDown(self):
1188 s3 = boto3.resource("s3")
1189 bucket = s3.Bucket(self.bucketName)
1190 try:
1191 bucket.objects.all().delete()
1192 except botocore.exceptions.ClientError as e:
1193 if e.response["Error"]["Code"] == "404":
1194 # the key was not reachable - pass
1195 pass
1196 else:
1197 raise
1199 bucket = s3.Bucket(self.bucketName)
1200 bucket.delete()
1202 # unset any potentially set dummy credentials
1203 if self.usingDummyCredentials:
1204 unsetAwsEnvCredentials()
1207@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1208# Mock required environment variables during tests
1209@unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN",
1210 "WEBDAV_BEARER_TOKEN": "XXXXXX"})
1211class WebdavDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1212 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1213 a local in-memory SqlRegistry.
1214 """
1215 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1216 fullConfigKey = None
1217 validationCanFail = True
1219 serverName = "localhost"
1220 """Name of the server that will be used in the tests.
1221 """
1223 portNumber = 8080
1224 """Port on which the webdav server listens. Automatically chosen
1225 at setUpClass via the _getfreeport() method
1226 """
1228 root = "butlerRoot/"
1229 """Root repository directory expected to be used in case useTempRoot=False.
1230 Otherwise the root is set to a 20 characters long randomly generated string
1231 during set-up.
1232 """
1234 datastoreStr = [f"datastore={root}"]
1235 """Contains all expected root locations in a format expected to be
1236 returned by Butler stringification.
1237 """
1239 datastoreName = ["WebdavDatastore@https://{serverName}/{root}"]
1240 """The expected format of the WebdavDatastore string."""
1242 registryStr = ":memory:"
1243 """Expected format of the Registry string."""
1245 serverThread = None
1246 """Thread in which the local webdav server will run"""
1248 stopWebdavServer = False
1249 """This flag will cause the webdav server to
1250 gracefully shut down when True
1251 """
1253 def genRoot(self):
1254 """Returns a random string of len 20 to serve as a root
1255 name for the temporary bucket repo.
1257 This is equivalent to tempfile.mkdtemp as this is what self.root
1258 becomes when useTempRoot is True.
1259 """
1260 rndstr = "".join(
1261 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1262 )
1263 return rndstr + "/"
1265 @classmethod
1266 def setUpClass(cls):
1267 # Do the same as inherited class
1268 cls.storageClassFactory = StorageClassFactory()
1269 cls.storageClassFactory.addFromConfig(cls.configFile)
1271 cls.portNumber = cls._getfreeport()
1272 # Run a local webdav server on which tests will be run
1273 cls.serverThread = Thread(target=cls._serveWebdav,
1274 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1275 daemon=True)
1276 cls.serverThread.start()
1277 # Wait for it to start
1278 time.sleep(3)
1280 @classmethod
1281 def tearDownClass(cls):
1282 # Ask for graceful shut down of the webdav server
1283 cls.stopWebdavServer = True
1284 # Wait for the thread to exit
1285 cls.serverThread.join()
1287 # Mock required environment variables during tests
1288 @unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN",
1289 "WEBDAV_BEARER_TOKEN": "XXXXXX"})
1290 def setUp(self):
1291 config = Config(self.configFile)
1293 if self.useTempRoot:
1294 self.root = self.genRoot()
1295 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1296 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1298 self.datastoreStr = f"datastore={self.root}"
1299 self.datastoreName = [f"WebdavDatastore@{self.rooturi}"]
1301 if not isWebdavEndpoint(self.rooturi):
1302 raise OSError("Webdav server not running properly: cannot run tests.")
1304 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1305 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1307 # Mock required environment variables during tests
1308 @unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN",
1309 "WEBDAV_BEARER_TOKEN": "XXXXXX"})
1310 def tearDown(self):
1311 # Clear temporary directory
1312 ButlerURI(self.rooturi).remove()
1314 def _serveWebdav(self, port: int, stopWebdavServer):
1315 """Starts a local webdav-compatible HTTP server,
1316 Listening on http://localhost:8080
1317 This server only runs when this test class is instantiated,
1318 and then shuts down. Must be started is a separate thread.
1320 Parameters
1321 ----------
1322 port : `int`
1323 The port number on which the server should listen
1324 """
1325 root_path = gettempdir()
1327 config = {
1328 "host": "0.0.0.0",
1329 "port": port,
1330 "provider_mapping": {"/": root_path},
1331 "http_authenticator": {
1332 "domain_controller": None
1333 },
1334 "simple_dc": {"user_mapping": {"*": True}},
1335 "verbose": 0,
1336 }
1337 app = WsgiDAVApp(config)
1339 server_args = {
1340 "bind_addr": (config["host"], config["port"]),
1341 "wsgi_app": app,
1342 }
1343 server = wsgi.Server(**server_args)
1344 server.prepare()
1346 try:
1347 # Start the actual server in a separate thread
1348 t = Thread(target=server.serve, daemon=True)
1349 t.start()
1350 # watch stopWebdavServer, and gracefully
1351 # shut down the server when True
1352 while True:
1353 if stopWebdavServer():
1354 break
1355 time.sleep(1)
1356 except KeyboardInterrupt:
1357 print("Caught Ctrl-C, shutting down...")
1358 finally:
1359 server.stop()
1360 t.join()
1362 def _getfreeport():
1363 """
1364 Determines a free port using sockets.
1365 """
1366 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1367 free_socket.bind(('0.0.0.0', 0))
1368 free_socket.listen()
1369 port = free_socket.getsockname()[1]
1370 free_socket.close()
1371 return port
1374if __name__ == "__main__": 1374 ↛ 1375line 1374 didn't jump to line 1375, because the condition on line 1374 was never true
1375 unittest.main()