Coverage for tests/test_butler.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import time
34import socket
36try:
37 import boto3
38 import botocore
39 from moto import mock_s3
40except ImportError:
41 boto3 = None
43 def mock_s3(cls):
44 """A no-op decorator in case moto mock_s3 can not be imported.
45 """
46 return cls
48try:
49 from cheroot import wsgi
50 from wsgidav.wsgidav_app import WsgiDAVApp
51except ImportError:
52 WsgiDAVApp = None
54import astropy.time
55from threading import Thread
56from tempfile import gettempdir
57from lsst.utils import doImport
58from lsst.daf.butler.core.utils import safeMakeDir
59from lsst.daf.butler import Butler, Config, ButlerConfig
60from lsst.daf.butler import StorageClassFactory
61from lsst.daf.butler import DatasetType, DatasetRef
62from lsst.daf.butler import FileTemplateValidationError, ValidationError
63from lsst.daf.butler import FileDataset
64from lsst.daf.butler import CollectionSearch, CollectionType
65from lsst.daf.butler import ButlerURI
66from lsst.daf.butler import script
67from lsst.daf.butler.registry import MissingCollectionError, OrphanedRecordError
68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials,
70 unsetAwsEnvCredentials)
71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint
73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
75TESTDIR = os.path.abspath(os.path.dirname(__file__))
78def makeExampleMetrics():
79 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
80 {"a": [1, 2, 3],
81 "b": {"blue": 5, "red": "green"}},
82 [563, 234, 456.7, 752, 8, 9, 27]
83 )
86class TransactionTestError(Exception):
87 """Specific error for testing transactions, to prevent misdiagnosing
88 that might otherwise occur when a standard exception is used.
89 """
90 pass
93class ButlerConfigTests(unittest.TestCase):
94 """Simple tests for ButlerConfig that are not tested in other test cases.
95 """
97 def testSearchPath(self):
98 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
99 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
100 config1 = ButlerConfig(configFile)
101 self.assertNotIn("testConfigs", "\n".join(cm.output))
103 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
104 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
105 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
106 self.assertIn("testConfigs", "\n".join(cm.output))
108 key = ("datastore", "records", "table")
109 self.assertNotEqual(config1[key], config2[key])
110 self.assertEqual(config2[key], "override_record")
113class ButlerPutGetTests:
114 """Helper method for running a suite of put/get tests from different
115 butler configurations."""
117 root = None
119 @staticmethod
120 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
121 """Create a DatasetType and register it
122 """
123 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
124 registry.registerDatasetType(datasetType)
125 return datasetType
127 @classmethod
128 def setUpClass(cls):
129 cls.storageClassFactory = StorageClassFactory()
130 cls.storageClassFactory.addFromConfig(cls.configFile)
132 def assertGetComponents(self, butler, datasetRef, components, reference):
133 datasetType = datasetRef.datasetType
134 dataId = datasetRef.dataId
135 deferred = butler.getDirectDeferred(datasetRef)
137 for component in components:
138 compTypeName = datasetType.componentTypeName(component)
139 result = butler.get(compTypeName, dataId)
140 self.assertEqual(result, getattr(reference, component))
141 result_deferred = deferred.get(component=component)
142 self.assertEqual(result_deferred, result)
144 def tearDown(self):
145 if self.root is not None and os.path.exists(self.root):
146 shutil.rmtree(self.root, ignore_errors=True)
148 def runPutGetTest(self, storageClass, datasetTypeName):
149 # New datasets will be added to run and tag, but we will only look in
150 # tag when looking up datasets.
151 run = "ingest/run"
152 tag = "ingest"
153 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
155 # There will not be a collection yet
156 collections = set(butler.registry.queryCollections())
157 self.assertEqual(collections, set([run, tag]))
159 # Create and register a DatasetType
160 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
162 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
164 # Add needed Dimensions
165 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
166 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
167 "name": "d-r",
168 "band": "R"})
169 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
170 "id": 1,
171 "name": "default"})
172 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
173 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
174 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
175 "name": "fourtwentythree", "physical_filter": "d-r",
176 "visit_system": 1, "datetime_begin": visit_start,
177 "datetime_end": visit_end})
179 # Add a second visit for some later tests
180 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
181 "name": "fourtwentyfour", "physical_filter": "d-r",
182 "visit_system": 1})
184 # Create and store a dataset
185 metric = makeExampleMetrics()
186 dataId = {"instrument": "DummyCamComp", "visit": 423}
188 # Create a DatasetRef for put
189 refIn = DatasetRef(datasetType, dataId, id=None)
191 # Put with a preexisting id should fail
192 with self.assertRaises(ValueError):
193 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
195 # Put and remove the dataset once as a DatasetRef, once as a dataId,
196 # and once with a DatasetType
197 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
198 with self.subTest(args=args):
199 ref = butler.put(metric, *args)
200 self.assertIsInstance(ref, DatasetRef)
202 # Test getDirect
203 metricOut = butler.getDirect(ref)
204 self.assertEqual(metric, metricOut)
205 # Test get
206 metricOut = butler.get(ref.datasetType.name, dataId)
207 self.assertEqual(metric, metricOut)
208 # Test get with a datasetRef
209 metricOut = butler.get(ref)
210 self.assertEqual(metric, metricOut)
211 # Test getDeferred with dataId
212 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
213 self.assertEqual(metric, metricOut)
214 # Test getDeferred with a datasetRef
215 metricOut = butler.getDeferred(ref).get()
216 self.assertEqual(metric, metricOut)
217 # and deferred direct with ref
218 metricOut = butler.getDirectDeferred(ref).get()
219 self.assertEqual(metric, metricOut)
221 # Check we can get components
222 if storageClass.isComposite():
223 self.assertGetComponents(butler, ref,
224 ("summary", "data", "output"), metric)
226 # Remove from the tagged collection only; after that we
227 # shouldn't be able to find it unless we use the dataset_id.
228 butler.pruneDatasets([ref])
229 with self.assertRaises(LookupError):
230 butler.datasetExists(*args)
231 # Registry still knows about it, if we use the dataset_id.
232 self.assertEqual(butler.registry.getDataset(ref.id), ref)
233 # If we use the output ref with the dataset_id, we should
234 # still be able to load it with getDirect().
235 self.assertEqual(metric, butler.getDirect(ref))
237 # Reinsert into collection, then delete from Datastore *and*
238 # remove from collection.
239 butler.registry.associate(tag, [ref])
240 butler.pruneDatasets([ref], unstore=True)
241 # Lookup with original args should still fail.
242 with self.assertRaises(LookupError):
243 butler.datasetExists(*args)
244 # Now getDirect() should fail, too.
245 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
246 butler.getDirect(ref)
247 # Registry still knows about it, if we use the dataset_id.
248 self.assertEqual(butler.registry.getDataset(ref.id), ref)
250 # Now remove the dataset completely.
251 butler.pruneDatasets([ref], purge=True, unstore=True)
252 # Lookup with original args should still fail.
253 with self.assertRaises(LookupError):
254 butler.datasetExists(*args)
255 # getDirect() should still fail.
256 with self.assertRaises(FileNotFoundError):
257 butler.getDirect(ref)
258 # Registry shouldn't be able to find it by dataset_id anymore.
259 self.assertIsNone(butler.registry.getDataset(ref.id))
261 # Put the dataset again, since the last thing we did was remove it.
262 ref = butler.put(metric, refIn)
264 # Get with parameters
265 stop = 4
266 sliced = butler.get(ref, parameters={"slice": slice(stop)})
267 self.assertNotEqual(metric, sliced)
268 self.assertEqual(metric.summary, sliced.summary)
269 self.assertEqual(metric.output, sliced.output)
270 self.assertEqual(metric.data[:stop], sliced.data)
271 # getDeferred with parameters
272 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
273 self.assertNotEqual(metric, sliced)
274 self.assertEqual(metric.summary, sliced.summary)
275 self.assertEqual(metric.output, sliced.output)
276 self.assertEqual(metric.data[:stop], sliced.data)
277 # getDeferred with deferred parameters
278 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
279 self.assertNotEqual(metric, sliced)
280 self.assertEqual(metric.summary, sliced.summary)
281 self.assertEqual(metric.output, sliced.output)
282 self.assertEqual(metric.data[:stop], sliced.data)
284 if storageClass.isComposite():
285 # Check that components can be retrieved
286 metricOut = butler.get(ref.datasetType.name, dataId)
287 compNameS = ref.datasetType.componentTypeName("summary")
288 compNameD = ref.datasetType.componentTypeName("data")
289 summary = butler.get(compNameS, dataId)
290 self.assertEqual(summary, metric.summary)
291 data = butler.get(compNameD, dataId)
292 self.assertEqual(data, metric.data)
294 if "counter" in storageClass.derivedComponents:
295 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
296 self.assertEqual(count, len(data))
298 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
299 parameters={"slice": slice(stop)})
300 self.assertEqual(count, stop)
302 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
303 summary = butler.getDirect(compRef)
304 self.assertEqual(summary, metric.summary)
306 # Create a Dataset type that has the same name but is inconsistent.
307 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
308 self.storageClassFactory.getStorageClass("Config"))
310 # Getting with a dataset type that does not match registry fails
311 with self.assertRaises(ValueError):
312 butler.get(inconsistentDatasetType, dataId)
314 # Combining a DatasetRef with a dataId should fail
315 with self.assertRaises(ValueError):
316 butler.get(ref, dataId)
317 # Getting with an explicit ref should fail if the id doesn't match
318 with self.assertRaises(ValueError):
319 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
321 # Getting a dataset with unknown parameters should fail
322 with self.assertRaises(KeyError):
323 butler.get(ref, parameters={"unsupported": True})
325 # Check we have a collection
326 collections = set(butler.registry.queryCollections())
327 self.assertEqual(collections, {run, tag})
329 # Clean up to check that we can remove something that may have
330 # already had a component removed
331 butler.pruneDatasets([ref], unstore=True, purge=True)
333 # Add a dataset back in since some downstream tests require
334 # something to be present
335 ref = butler.put(metric, refIn)
337 return butler
339 def testDeferredCollectionPassing(self):
340 # Construct a butler with no run or collection, but make it writeable.
341 butler = Butler(self.tmpConfigFile, writeable=True)
342 # Create and register a DatasetType
343 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
344 datasetType = self.addDatasetType("example", dimensions,
345 self.storageClassFactory.getStorageClass("StructuredData"),
346 butler.registry)
347 # Add needed Dimensions
348 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
349 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
350 "name": "d-r",
351 "band": "R"})
352 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
353 "name": "fourtwentythree", "physical_filter": "d-r"})
354 dataId = {"instrument": "DummyCamComp", "visit": 423}
355 # Create dataset.
356 metric = makeExampleMetrics()
357 # Register a new run and put dataset.
358 run = "deferred"
359 butler.registry.registerRun(run)
360 ref = butler.put(metric, datasetType, dataId, run=run)
361 # Putting with no run should fail with TypeError.
362 with self.assertRaises(TypeError):
363 butler.put(metric, datasetType, dataId)
364 # Dataset should exist.
365 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
366 # We should be able to get the dataset back, but with and without
367 # a deferred dataset handle.
368 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
369 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
370 # Trying to find the dataset without any collection is a TypeError.
371 with self.assertRaises(TypeError):
372 butler.datasetExists(datasetType, dataId)
373 with self.assertRaises(TypeError):
374 butler.get(datasetType, dataId)
375 # Associate the dataset with a different collection.
376 butler.registry.registerCollection("tagged")
377 butler.registry.associate("tagged", [ref])
378 # Deleting the dataset from the new collection should make it findable
379 # in the original collection.
380 butler.pruneDatasets([ref], tags=["tagged"])
381 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
384class ButlerTests(ButlerPutGetTests):
385 """Tests for Butler.
386 """
387 useTempRoot = True
389 def setUp(self):
390 """Create a new butler root for each test."""
391 if self.useTempRoot:
392 self.root = tempfile.mkdtemp(dir=TESTDIR)
393 Butler.makeRepo(self.root, config=Config(self.configFile))
394 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
395 else:
396 self.root = None
397 self.tmpConfigFile = self.configFile
399 def testConstructor(self):
400 """Independent test of constructor.
401 """
402 butler = Butler(self.tmpConfigFile, run="ingest")
403 self.assertIsInstance(butler, Butler)
405 collections = set(butler.registry.queryCollections())
406 self.assertEqual(collections, {"ingest"})
408 butler2 = Butler(butler=butler, collections=["other"])
409 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
410 self.assertIsNone(butler2.run)
411 self.assertIs(butler.registry, butler2.registry)
412 self.assertIs(butler.datastore, butler2.datastore)
414 def testBasicPutGet(self):
415 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
416 self.runPutGetTest(storageClass, "test_metric")
418 def testCompositePutGetConcrete(self):
420 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
421 butler = self.runPutGetTest(storageClass, "test_metric")
423 # Should *not* be disassembled
424 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
425 self.assertEqual(len(datasets), 1)
426 uri, components = butler.getURIs(datasets[0])
427 self.assertIsInstance(uri, ButlerURI)
428 self.assertFalse(components)
429 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
430 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
432 # Predicted dataset
433 dataId = {"instrument": "DummyCamComp", "visit": 424}
434 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
435 self.assertFalse(components)
436 self.assertIsInstance(uri, ButlerURI)
437 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
438 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
440 def testCompositePutGetVirtual(self):
441 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
442 butler = self.runPutGetTest(storageClass, "test_metric_comp")
444 # Should be disassembled
445 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
446 self.assertEqual(len(datasets), 1)
447 uri, components = butler.getURIs(datasets[0])
449 if butler.datastore.isEphemeral:
450 # Never disassemble in-memory datastore
451 self.assertIsInstance(uri, ButlerURI)
452 self.assertFalse(components)
453 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
454 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
455 else:
456 self.assertIsNone(uri)
457 self.assertEqual(set(components), set(storageClass.components))
458 for compuri in components.values():
459 self.assertIsInstance(compuri, ButlerURI)
460 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
461 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
463 # Predicted dataset
464 dataId = {"instrument": "DummyCamComp", "visit": 424}
465 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
467 if butler.datastore.isEphemeral:
468 # Never disassembled
469 self.assertIsInstance(uri, ButlerURI)
470 self.assertFalse(components)
471 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
472 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
473 else:
474 self.assertIsNone(uri)
475 self.assertEqual(set(components), set(storageClass.components))
476 for compuri in components.values():
477 self.assertIsInstance(compuri, ButlerURI)
478 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
479 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
481 def testIngest(self):
482 butler = Butler(self.tmpConfigFile, run="ingest")
484 # Create and register a DatasetType
485 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
487 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
488 datasetTypeName = "metric"
490 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
492 # Add needed Dimensions
493 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
494 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
495 "name": "d-r",
496 "band": "R"})
497 for detector in (1, 2):
498 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
499 "full_name": f"detector{detector}"})
501 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
502 "name": "fourtwentythree", "physical_filter": "d-r"},
503 {"instrument": "DummyCamComp", "id": 424,
504 "name": "fourtwentyfour", "physical_filter": "d-r"})
506 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
507 dataRoot = os.path.join(TESTDIR, "data", "basic")
508 datasets = []
509 for detector in (1, 2):
510 detector_name = f"detector_{detector}"
511 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
512 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
513 # Create a DatasetRef for ingest
514 refIn = DatasetRef(datasetType, dataId, id=None)
516 datasets.append(FileDataset(path=metricFile,
517 refs=[refIn],
518 formatter=formatter))
520 butler.ingest(*datasets, transfer="copy")
522 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
523 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
525 metrics1 = butler.get(datasetTypeName, dataId1)
526 metrics2 = butler.get(datasetTypeName, dataId2)
527 self.assertNotEqual(metrics1, metrics2)
529 # Compare URIs
530 uri1 = butler.getURI(datasetTypeName, dataId1)
531 uri2 = butler.getURI(datasetTypeName, dataId2)
532 self.assertNotEqual(uri1, uri2)
534 # Now do a multi-dataset but single file ingest
535 metricFile = os.path.join(dataRoot, "detectors.yaml")
536 refs = []
537 for detector in (1, 2):
538 detector_name = f"detector_{detector}"
539 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
540 # Create a DatasetRef for ingest
541 refs.append(DatasetRef(datasetType, dataId, id=None))
543 datasets = []
544 datasets.append(FileDataset(path=metricFile,
545 refs=refs,
546 formatter=MultiDetectorFormatter))
548 butler.ingest(*datasets, transfer="copy")
550 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
551 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
553 multi1 = butler.get(datasetTypeName, dataId1)
554 multi2 = butler.get(datasetTypeName, dataId2)
556 self.assertEqual(multi1, metrics1)
557 self.assertEqual(multi2, metrics2)
559 # Compare URIs
560 uri1 = butler.getURI(datasetTypeName, dataId1)
561 uri2 = butler.getURI(datasetTypeName, dataId2)
562 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
564 # Test that removing one does not break the second
565 # This line will issue a warning log message for a ChainedDatastore
566 # that uses an InMemoryDatastore since in-memory can not ingest
567 # files.
568 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
569 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
570 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
571 multi2b = butler.get(datasetTypeName, dataId2)
572 self.assertEqual(multi2, multi2b)
574 def testPruneCollections(self):
575 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
576 butler = Butler(self.tmpConfigFile, writeable=True)
577 # Load registry data with dimensions to hang datasets off of.
578 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
579 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
580 # Add some RUN-type collections.
581 run1 = "run1"
582 butler.registry.registerRun(run1)
583 run2 = "run2"
584 butler.registry.registerRun(run2)
585 # put some datasets. ref1 and ref2 have the same data ID, and are in
586 # different runs. ref3 has a different data ID.
587 metric = makeExampleMetrics()
588 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
589 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
590 butler.registry)
591 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
592 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
593 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
595 # Add a new dataset type and delete it
596 tmpName = "prune_collections_disposable"
597 tmpDatasetType = self.addDatasetType(tmpName, dimensions, storageClass,
598 butler.registry)
599 tmpFromRegistry = butler.registry.getDatasetType(tmpName)
600 self.assertEqual(tmpDatasetType, tmpFromRegistry)
601 butler.registry.removeDatasetType(tmpName)
602 with self.assertRaises(KeyError):
603 butler.registry.getDatasetType(tmpName)
604 # Removing a second time is fine
605 butler.registry.removeDatasetType(tmpName)
607 # Component removal is not allowed
608 with self.assertRaises(ValueError):
609 butler.registry.removeDatasetType(DatasetType.nameWithComponent(tmpName, "component"))
611 # Try and fail to delete a datasetType that is associated with data
612 with self.assertRaises(OrphanedRecordError):
613 butler.registry.removeDatasetType(datasetType.name)
615 # Try to delete a RUN collection without purge, or with purge and not
616 # unstore.
617 with self.assertRaises(TypeError):
618 butler.pruneCollection(run1)
619 with self.assertRaises(TypeError):
620 butler.pruneCollection(run2, purge=True)
621 # Add a TAGGED collection and associate ref3 only into it.
622 tag1 = "tag1"
623 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
624 butler.registry.associate(tag1, [ref3])
625 # Add a CHAINED collection that searches run1 and then run2. It
626 # logically contains only ref1, because ref2 is shadowed due to them
627 # having the same data ID and dataset type.
628 chain1 = "chain1"
629 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
630 butler.registry.setCollectionChain(chain1, [run1, run2])
631 # Try to delete RUN collections, which should fail with complete
632 # rollback because they're still referenced by the CHAINED
633 # collection.
634 with self.assertRaises(Exception):
635 butler.pruneCollection(run1, pruge=True, unstore=True)
636 with self.assertRaises(Exception):
637 butler.pruneCollection(run2, pruge=True, unstore=True)
638 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
639 [ref1, ref2, ref3])
640 self.assertTrue(butler.datastore.exists(ref1))
641 self.assertTrue(butler.datastore.exists(ref2))
642 self.assertTrue(butler.datastore.exists(ref3))
643 # Try to delete CHAINED and TAGGED collections with purge; should not
644 # work.
645 with self.assertRaises(TypeError):
646 butler.pruneCollection(tag1, purge=True, unstore=True)
647 with self.assertRaises(TypeError):
648 butler.pruneCollection(chain1, purge=True, unstore=True)
649 # Remove the tagged collection with unstore=False. This should not
650 # affect the datasets.
651 butler.pruneCollection(tag1)
652 with self.assertRaises(MissingCollectionError):
653 butler.registry.getCollectionType(tag1)
654 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
655 [ref1, ref2, ref3])
656 self.assertTrue(butler.datastore.exists(ref1))
657 self.assertTrue(butler.datastore.exists(ref2))
658 self.assertTrue(butler.datastore.exists(ref3))
659 # Add the tagged collection back in, and remove it with unstore=True.
660 # This should remove ref3 only from the datastore.
661 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
662 butler.registry.associate(tag1, [ref3])
663 butler.pruneCollection(tag1, unstore=True)
664 with self.assertRaises(MissingCollectionError):
665 butler.registry.getCollectionType(tag1)
666 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
667 [ref1, ref2, ref3])
668 self.assertTrue(butler.datastore.exists(ref1))
669 self.assertTrue(butler.datastore.exists(ref2))
670 self.assertFalse(butler.datastore.exists(ref3))
671 # Delete the chain with unstore=False. The datasets should not be
672 # affected at all.
673 butler.pruneCollection(chain1)
674 with self.assertRaises(MissingCollectionError):
675 butler.registry.getCollectionType(chain1)
676 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
677 [ref1, ref2, ref3])
678 self.assertTrue(butler.datastore.exists(ref1))
679 self.assertTrue(butler.datastore.exists(ref2))
680 self.assertFalse(butler.datastore.exists(ref3))
681 # Redefine and then delete the chain with unstore=True. Only ref1
682 # should be unstored (ref3 has already been unstored, but otherwise
683 # would be now).
684 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
685 butler.registry.setCollectionChain(chain1, [run1, run2])
686 butler.pruneCollection(chain1, unstore=True)
687 with self.assertRaises(MissingCollectionError):
688 butler.registry.getCollectionType(chain1)
689 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
690 [ref1, ref2, ref3])
691 self.assertFalse(butler.datastore.exists(ref1))
692 self.assertTrue(butler.datastore.exists(ref2))
693 self.assertFalse(butler.datastore.exists(ref3))
694 # Remove run1. This removes ref1 and ref3 from the registry (they're
695 # already gone from the datastore, which is fine).
696 butler.pruneCollection(run1, purge=True, unstore=True)
697 with self.assertRaises(MissingCollectionError):
698 butler.registry.getCollectionType(run1)
699 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
700 [ref2])
701 self.assertTrue(butler.datastore.exists(ref2))
702 # Remove run2. This removes ref2 from the registry and the datastore.
703 butler.pruneCollection(run2, purge=True, unstore=True)
704 with self.assertRaises(MissingCollectionError):
705 butler.registry.getCollectionType(run2)
706 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
707 [])
709 # Now that the collections have been pruned we can remove the
710 # dataset type
711 butler.registry.removeDatasetType(datasetType.name)
713 def testPickle(self):
714 """Test pickle support.
715 """
716 butler = Butler(self.tmpConfigFile, run="ingest")
717 butlerOut = pickle.loads(pickle.dumps(butler))
718 self.assertIsInstance(butlerOut, Butler)
719 self.assertEqual(butlerOut._config, butler._config)
720 self.assertEqual(butlerOut.collections, butler.collections)
721 self.assertEqual(butlerOut.run, butler.run)
723 def testGetDatasetTypes(self):
724 butler = Butler(self.tmpConfigFile, run="ingest")
725 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
726 dimensionEntries = [
727 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
728 {"instrument": "DummyCamComp"}),
729 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
730 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
731 ]
732 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
733 # Add needed Dimensions
734 for args in dimensionEntries:
735 butler.registry.insertDimensionData(*args)
737 # When a DatasetType is added to the registry entries are not created
738 # for components but querying them can return the components.
739 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
740 components = set()
741 for datasetTypeName in datasetTypeNames:
742 # Create and register a DatasetType
743 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
745 for componentName in storageClass.components:
746 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
748 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
749 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
751 # Now that we have some dataset types registered, validate them
752 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
753 "datasetType.component"])
755 # Add a new datasetType that will fail template validation
756 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
757 if self.validationCanFail:
758 with self.assertRaises(ValidationError):
759 butler.validateConfiguration()
761 # Rerun validation but with a subset of dataset type names
762 butler.validateConfiguration(datasetTypeNames=["metric4"])
764 # Rerun validation but ignore the bad datasetType
765 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
766 "datasetType.component"])
768 def testTransaction(self):
769 butler = Butler(self.tmpConfigFile, run="ingest")
770 datasetTypeName = "test_metric"
771 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
772 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
773 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
774 "band": "R"}),
775 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
776 "physical_filter": "d-r"}))
777 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
778 metric = makeExampleMetrics()
779 dataId = {"instrument": "DummyCam", "visit": 42}
780 # Create and register a DatasetType
781 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
782 with self.assertRaises(TransactionTestError):
783 with butler.transaction():
784 # Add needed Dimensions
785 for args in dimensionEntries:
786 butler.registry.insertDimensionData(*args)
787 # Store a dataset
788 ref = butler.put(metric, datasetTypeName, dataId)
789 self.assertIsInstance(ref, DatasetRef)
790 # Test getDirect
791 metricOut = butler.getDirect(ref)
792 self.assertEqual(metric, metricOut)
793 # Test get
794 metricOut = butler.get(datasetTypeName, dataId)
795 self.assertEqual(metric, metricOut)
796 # Check we can get components
797 self.assertGetComponents(butler, ref,
798 ("summary", "data", "output"), metric)
799 raise TransactionTestError("This should roll back the entire transaction")
800 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
801 butler.registry.expandDataId(dataId)
802 # Should raise LookupError for missing data ID value
803 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
804 butler.get(datasetTypeName, dataId)
805 # Also check explicitly if Dataset entry is missing
806 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
807 # Direct retrieval should not find the file in the Datastore
808 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
809 butler.getDirect(ref)
811 def testMakeRepo(self):
812 """Test that we can write butler configuration to a new repository via
813 the Butler.makeRepo interface and then instantiate a butler from the
814 repo root.
815 """
816 # Do not run the test if we know this datastore configuration does
817 # not support a file system root
818 if self.fullConfigKey is None:
819 return
821 # Remove the file created in setUp
822 os.unlink(self.tmpConfigFile)
824 createRegistry = not self.useTempRoot
825 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile),
826 createRegistry=createRegistry)
827 limited = Config(self.configFile)
828 butler1 = Butler(butlerConfig)
829 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
830 config=Config(self.configFile), overwrite=True)
831 full = Config(self.tmpConfigFile)
832 butler2 = Butler(butlerConfig)
833 # Butlers should have the same configuration regardless of whether
834 # defaults were expanded.
835 self.assertEqual(butler1._config, butler2._config)
836 # Config files loaded directly should not be the same.
837 self.assertNotEqual(limited, full)
838 # Make sure "limited" doesn't have a few keys we know it should be
839 # inheriting from defaults.
840 self.assertIn(self.fullConfigKey, full)
841 self.assertNotIn(self.fullConfigKey, limited)
843 # Collections don't appear until something is put in them
844 collections1 = set(butler1.registry.queryCollections())
845 self.assertEqual(collections1, set())
846 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
848 # Check that a config with no associated file name will not
849 # work properly with relocatable Butler repo
850 butlerConfig.configFile = None
851 with self.assertRaises(ValueError):
852 Butler(butlerConfig)
854 with self.assertRaises(FileExistsError):
855 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
856 config=Config(self.configFile), overwrite=False)
858 def testStringification(self):
859 butler = Butler(self.tmpConfigFile, run="ingest")
860 butlerStr = str(butler)
862 if self.datastoreStr is not None:
863 for testStr in self.datastoreStr:
864 self.assertIn(testStr, butlerStr)
865 if self.registryStr is not None:
866 self.assertIn(self.registryStr, butlerStr)
868 datastoreName = butler.datastore.name
869 if self.datastoreName is not None:
870 for testStr in self.datastoreName:
871 self.assertIn(testStr, datastoreName)
874class FileLikeDatastoreButlerTests(ButlerTests):
875 """Common tests and specialization of ButlerTests for butlers backed
876 by datastores that inherit from FileLikeDatastore.
877 """
879 def checkFileExists(self, root, relpath):
880 """Checks if file exists at a given path (relative to root).
882 Test testPutTemplates verifies actual physical existance of the files
883 in the requested location.
884 """
885 uri = ButlerURI(root, forceDirectory=True)
886 return uri.join(relpath).exists()
888 def testPutTemplates(self):
889 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
890 butler = Butler(self.tmpConfigFile, run="ingest")
892 # Add needed Dimensions
893 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
894 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
895 "name": "d-r",
896 "band": "R"})
897 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
898 "physical_filter": "d-r"})
899 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
900 "physical_filter": "d-r"})
902 # Create and store a dataset
903 metric = makeExampleMetrics()
905 # Create two almost-identical DatasetTypes (both will use default
906 # template)
907 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
908 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
909 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
910 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
912 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
913 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
915 # Put with exactly the data ID keys needed
916 ref = butler.put(metric, "metric1", dataId1)
917 uri = butler.getURI(ref)
918 self.assertTrue(self.checkFileExists(butler.datastore.root,
919 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
920 f"Checking existence of {uri}")
922 # Check the template based on dimensions
923 butler.datastore.templates.validateTemplates([ref])
925 # Put with extra data ID keys (physical_filter is an optional
926 # dependency); should not change template (at least the way we're
927 # defining them to behave now; the important thing is that they
928 # must be consistent).
929 ref = butler.put(metric, "metric2", dataId2)
930 uri = butler.getURI(ref)
931 self.assertTrue(self.checkFileExists(butler.datastore.root,
932 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
933 f"Checking existence of {uri}")
935 # Check the template based on dimensions
936 butler.datastore.templates.validateTemplates([ref])
938 # Now use a file template that will not result in unique filenames
939 with self.assertRaises(FileTemplateValidationError):
940 butler.put(metric, "metric3", dataId1)
942 def testImportExport(self):
943 # Run put/get tests just to create and populate a repo.
944 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
945 self.runImportExportTest(storageClass)
947 @unittest.expectedFailure
948 def testImportExportVirtualComposite(self):
949 # Run put/get tests just to create and populate a repo.
950 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
951 self.runImportExportTest(storageClass)
953 def runImportExportTest(self, storageClass):
954 """This test does an export to a temp directory and an import back
955 into a new temp directory repo. It does not assume a posix datastore"""
956 exportButler = self.runPutGetTest(storageClass, "test_metric")
957 print("Root:", exportButler.datastore.root)
958 # Test that the repo actually has at least one dataset.
959 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
960 self.assertGreater(len(datasets), 0)
961 # Add a DimensionRecord that's unused by those datasets.
962 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
963 exportButler.registry.insertDimensionData("skymap", skymapRecord)
964 # Export and then import datasets.
965 with tempfile.TemporaryDirectory() as exportDir:
966 exportFile = os.path.join(exportDir, "exports.yaml")
967 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
968 export.saveDatasets(datasets)
969 # Export the same datasets again. This should quietly do
970 # nothing because of internal deduplication, and it shouldn't
971 # complain about being asked to export the "htm7" elements even
972 # though there aren't any in these datasets or in the database.
973 export.saveDatasets(datasets, elements=["htm7"])
974 # Save one of the data IDs again; this should be harmless
975 # because of internal deduplication.
976 export.saveDataIds([datasets[0].dataId])
977 # Save some dimension records directly.
978 export.saveDimensionData("skymap", [skymapRecord])
979 self.assertTrue(os.path.exists(exportFile))
980 with tempfile.TemporaryDirectory() as importDir:
981 # We always want this to be a local posix butler
982 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
983 # Calling script.butlerImport tests the implementation of the
984 # butler command line interface "import" subcommand. Functions
985 # in the script folder are generally considered protected and
986 # should not be used as public api.
987 with open(exportFile, "r") as f:
988 script.butlerImport(importDir, export_file=f,
989 directory=exportDir, transfer="auto", skip_dimensions=None)
990 importButler = Butler(importDir, run="ingest/run")
991 for ref in datasets:
992 with self.subTest(ref=ref):
993 # Test for existence by passing in the DatasetType and
994 # data ID separately, to avoid lookup by dataset_id.
995 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
996 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
997 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
1000class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1001 """PosixDatastore specialization of a butler"""
1002 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1003 fullConfigKey = ".datastore.formatters"
1004 validationCanFail = True
1005 datastoreStr = ["/tmp"]
1006 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
1007 registryStr = "/gen3.sqlite3"
1009 def testExportTransferCopy(self):
1010 """Test local export using all transfer modes"""
1011 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1012 exportButler = self.runPutGetTest(storageClass, "test_metric")
1013 # Test that the repo actually has at least one dataset.
1014 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1015 self.assertGreater(len(datasets), 0)
1016 uris = [exportButler.getURI(d) for d in datasets]
1017 datastoreRoot = exportButler.datastore.root
1019 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1021 for path in pathsInStore:
1022 # Assume local file system
1023 self.assertTrue(self.checkFileExists(datastoreRoot, path),
1024 f"Checking path {path}")
1026 for transfer in ("copy", "link", "symlink", "relsymlink"):
1027 with tempfile.TemporaryDirectory(dir=TESTDIR) as exportDir:
1028 with exportButler.export(directory=exportDir, format="yaml",
1029 transfer=transfer) as export:
1030 export.saveDatasets(datasets)
1031 for path in pathsInStore:
1032 self.assertTrue(self.checkFileExists(exportDir, path),
1033 f"Check that mode {transfer} exported files")
1036class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1037 """InMemoryDatastore specialization of a butler"""
1038 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1039 fullConfigKey = None
1040 useTempRoot = False
1041 validationCanFail = False
1042 datastoreStr = ["datastore='InMemory"]
1043 datastoreName = ["InMemoryDatastore@"]
1044 registryStr = ":memory:"
1046 def testIngest(self):
1047 pass
1050class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1051 """PosixDatastore specialization"""
1052 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1053 fullConfigKey = ".datastore.datastores.1.formatters"
1054 validationCanFail = True
1055 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1/,", "/PosixDatastore_2/'"]
1056 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
1057 "SecondDatastore"]
1058 registryStr = "/gen3.sqlite3"
1061class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1062 """Test that a yaml file in one location can refer to a root in another."""
1064 datastoreStr = ["dir1"]
1065 # Disable the makeRepo test since we are deliberately not using
1066 # butler.yaml as the config name.
1067 fullConfigKey = None
1069 def setUp(self):
1070 self.root = tempfile.mkdtemp(dir=TESTDIR)
1072 # Make a new repository in one place
1073 self.dir1 = os.path.join(self.root, "dir1")
1074 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1076 # Move the yaml file to a different place and add a "root"
1077 self.dir2 = os.path.join(self.root, "dir2")
1078 safeMakeDir(self.dir2)
1079 configFile1 = os.path.join(self.dir1, "butler.yaml")
1080 config = Config(configFile1)
1081 config["root"] = self.dir1
1082 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1083 config.dumpToUri(configFile2)
1084 os.remove(configFile1)
1085 self.tmpConfigFile = configFile2
1087 def testFileLocations(self):
1088 self.assertNotEqual(self.dir1, self.dir2)
1089 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1090 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1091 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1094class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1095 """Test that a config file created by makeRepo outside of repo works."""
1097 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1099 def setUp(self):
1100 self.root = tempfile.mkdtemp(dir=TESTDIR)
1101 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1103 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1104 Butler.makeRepo(self.root, config=Config(self.configFile),
1105 outfile=self.tmpConfigFile)
1107 def tearDown(self):
1108 if os.path.exists(self.root2):
1109 shutil.rmtree(self.root2, ignore_errors=True)
1110 super().tearDown()
1112 def testConfigExistence(self):
1113 c = Config(self.tmpConfigFile)
1114 uri_config = ButlerURI(c["root"])
1115 uri_expected = ButlerURI(self.root, forceDirectory=True)
1116 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1117 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1119 def testPutGet(self):
1120 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1121 self.runPutGetTest(storageClass, "test_metric")
1124class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1125 """Test that a config file created by makeRepo outside of repo works."""
1127 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1129 def setUp(self):
1130 self.root = tempfile.mkdtemp(dir=TESTDIR)
1131 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1133 self.tmpConfigFile = self.root2
1134 Butler.makeRepo(self.root, config=Config(self.configFile),
1135 outfile=self.tmpConfigFile)
1137 def testConfigExistence(self):
1138 # Append the yaml file else Config constructor does not know the file
1139 # type.
1140 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1141 super().testConfigExistence()
1144class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1145 """Test that a config file created by makeRepo outside of repo works."""
1147 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1149 def setUp(self):
1150 self.root = tempfile.mkdtemp(dir=TESTDIR)
1151 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1153 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1154 Butler.makeRepo(self.root, config=Config(self.configFile),
1155 outfile=self.tmpConfigFile)
1158@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1159@mock_s3
1160class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1161 """S3Datastore specialization of a butler; an S3 storage Datastore +
1162 a local in-memory SqlRegistry.
1163 """
1164 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1165 fullConfigKey = None
1166 validationCanFail = True
1168 bucketName = "anybucketname"
1169 """Name of the Bucket that will be used in the tests. The name is read from
1170 the config file used with the tests during set-up.
1171 """
1173 root = "butlerRoot/"
1174 """Root repository directory expected to be used in case useTempRoot=False.
1175 Otherwise the root is set to a 20 characters long randomly generated string
1176 during set-up.
1177 """
1179 datastoreStr = [f"datastore={root}"]
1180 """Contains all expected root locations in a format expected to be
1181 returned by Butler stringification.
1182 """
1184 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1185 """The expected format of the S3Datastore string."""
1187 registryStr = ":memory:"
1188 """Expected format of the Registry string."""
1190 def genRoot(self):
1191 """Returns a random string of len 20 to serve as a root
1192 name for the temporary bucket repo.
1194 This is equivalent to tempfile.mkdtemp as this is what self.root
1195 becomes when useTempRoot is True.
1196 """
1197 rndstr = "".join(
1198 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1199 )
1200 return rndstr + "/"
1202 def setUp(self):
1203 config = Config(self.configFile)
1204 uri = ButlerURI(config[".datastore.datastore.root"])
1205 self.bucketName = uri.netloc
1207 # set up some fake credentials if they do not exist
1208 self.usingDummyCredentials = setAwsEnvCredentials()
1210 if self.useTempRoot:
1211 self.root = self.genRoot()
1212 rooturi = f"s3://{self.bucketName}/{self.root}"
1213 config.update({"datastore": {"datastore": {"root": rooturi}}})
1215 # MOTO needs to know that we expect Bucket bucketname to exist
1216 # (this used to be the class attribute bucketName)
1217 s3 = boto3.resource("s3")
1218 s3.create_bucket(Bucket=self.bucketName)
1220 self.datastoreStr = f"datastore={self.root}"
1221 self.datastoreName = [f"S3Datastore@{rooturi}"]
1222 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1223 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1225 def tearDown(self):
1226 s3 = boto3.resource("s3")
1227 bucket = s3.Bucket(self.bucketName)
1228 try:
1229 bucket.objects.all().delete()
1230 except botocore.exceptions.ClientError as e:
1231 if e.response["Error"]["Code"] == "404":
1232 # the key was not reachable - pass
1233 pass
1234 else:
1235 raise
1237 bucket = s3.Bucket(self.bucketName)
1238 bucket.delete()
1240 # unset any potentially set dummy credentials
1241 if self.usingDummyCredentials:
1242 unsetAwsEnvCredentials()
1245@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1246# Mock required environment variables during tests
1247@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1248 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1249 TESTDIR, "config/testConfigs/webdav/token"),
1250 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1251class WebdavDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1252 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1253 a local in-memory SqlRegistry.
1254 """
1255 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1256 fullConfigKey = None
1257 validationCanFail = True
1259 serverName = "localhost"
1260 """Name of the server that will be used in the tests.
1261 """
1263 portNumber = 8080
1264 """Port on which the webdav server listens. Automatically chosen
1265 at setUpClass via the _getfreeport() method
1266 """
1268 root = "butlerRoot/"
1269 """Root repository directory expected to be used in case useTempRoot=False.
1270 Otherwise the root is set to a 20 characters long randomly generated string
1271 during set-up.
1272 """
1274 datastoreStr = [f"datastore={root}"]
1275 """Contains all expected root locations in a format expected to be
1276 returned by Butler stringification.
1277 """
1279 datastoreName = ["WebdavDatastore@https://{serverName}/{root}"]
1280 """The expected format of the WebdavDatastore string."""
1282 registryStr = ":memory:"
1283 """Expected format of the Registry string."""
1285 serverThread = None
1286 """Thread in which the local webdav server will run"""
1288 stopWebdavServer = False
1289 """This flag will cause the webdav server to
1290 gracefully shut down when True
1291 """
1293 def genRoot(self):
1294 """Returns a random string of len 20 to serve as a root
1295 name for the temporary bucket repo.
1297 This is equivalent to tempfile.mkdtemp as this is what self.root
1298 becomes when useTempRoot is True.
1299 """
1300 rndstr = "".join(
1301 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1302 )
1303 return rndstr + "/"
1305 @classmethod
1306 def setUpClass(cls):
1307 # Do the same as inherited class
1308 cls.storageClassFactory = StorageClassFactory()
1309 cls.storageClassFactory.addFromConfig(cls.configFile)
1311 cls.portNumber = cls._getfreeport()
1312 # Run a local webdav server on which tests will be run
1313 cls.serverThread = Thread(target=cls._serveWebdav,
1314 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1315 daemon=True)
1316 cls.serverThread.start()
1317 # Wait for it to start
1318 time.sleep(3)
1320 @classmethod
1321 def tearDownClass(cls):
1322 # Ask for graceful shut down of the webdav server
1323 cls.stopWebdavServer = True
1324 # Wait for the thread to exit
1325 cls.serverThread.join()
1327 # Mock required environment variables during tests
1328 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1329 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1330 TESTDIR, "config/testConfigs/webdav/token"),
1331 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1332 def setUp(self):
1333 config = Config(self.configFile)
1335 if self.useTempRoot:
1336 self.root = self.genRoot()
1337 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1338 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1340 self.datastoreStr = f"datastore={self.root}"
1341 self.datastoreName = [f"WebdavDatastore@{self.rooturi}"]
1343 if not isWebdavEndpoint(self.rooturi):
1344 raise OSError("Webdav server not running properly: cannot run tests.")
1346 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1347 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1349 # Mock required environment variables during tests
1350 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1351 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1352 TESTDIR, "config/testConfigs/webdav/token"),
1353 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1354 def tearDown(self):
1355 # Clear temporary directory
1356 ButlerURI(self.rooturi).remove()
1357 ButlerURI(self.rooturi).session.close()
1359 def _serveWebdav(self, port: int, stopWebdavServer):
1360 """Starts a local webdav-compatible HTTP server,
1361 Listening on http://localhost:8080
1362 This server only runs when this test class is instantiated,
1363 and then shuts down. Must be started is a separate thread.
1365 Parameters
1366 ----------
1367 port : `int`
1368 The port number on which the server should listen
1369 """
1370 root_path = gettempdir()
1372 config = {
1373 "host": "0.0.0.0",
1374 "port": port,
1375 "provider_mapping": {"/": root_path},
1376 "http_authenticator": {
1377 "domain_controller": None
1378 },
1379 "simple_dc": {"user_mapping": {"*": True}},
1380 "verbose": 0,
1381 }
1382 app = WsgiDAVApp(config)
1384 server_args = {
1385 "bind_addr": (config["host"], config["port"]),
1386 "wsgi_app": app,
1387 }
1388 server = wsgi.Server(**server_args)
1389 server.prepare()
1391 try:
1392 # Start the actual server in a separate thread
1393 t = Thread(target=server.serve, daemon=True)
1394 t.start()
1395 # watch stopWebdavServer, and gracefully
1396 # shut down the server when True
1397 while True:
1398 if stopWebdavServer():
1399 break
1400 time.sleep(1)
1401 except KeyboardInterrupt:
1402 print("Caught Ctrl-C, shutting down...")
1403 finally:
1404 server.stop()
1405 t.join()
1407 def _getfreeport():
1408 """
1409 Determines a free port using sockets.
1410 """
1411 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1412 free_socket.bind(('0.0.0.0', 0))
1413 free_socket.listen()
1414 port = free_socket.getsockname()[1]
1415 free_socket.close()
1416 return port
1419if __name__ == "__main__": 1419 ↛ 1420line 1419 didn't jump to line 1420, because the condition on line 1419 was never true
1420 unittest.main()