Coverage for tests/test_butler.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import time
34import socket
36try:
37 import boto3
38 import botocore
39 from moto import mock_s3
40except ImportError:
41 boto3 = None
43 def mock_s3(cls):
44 """A no-op decorator in case moto mock_s3 can not be imported.
45 """
46 return cls
48try:
49 from cheroot import wsgi
50 from wsgidav.wsgidav_app import WsgiDAVApp
51except ImportError:
52 WsgiDAVApp = None
54import astropy.time
55from threading import Thread
56from tempfile import gettempdir
57from lsst.utils import doImport
58from lsst.daf.butler.core.utils import safeMakeDir
59from lsst.daf.butler import Butler, Config, ButlerConfig
60from lsst.daf.butler import StorageClassFactory
61from lsst.daf.butler import DatasetType, DatasetRef
62from lsst.daf.butler import FileTemplateValidationError, ValidationError
63from lsst.daf.butler import FileDataset
64from lsst.daf.butler import CollectionSearch, CollectionType
65from lsst.daf.butler import ButlerURI
66from lsst.daf.butler import script
67from lsst.daf.butler.registry import MissingCollectionError
68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials,
70 unsetAwsEnvCredentials)
71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint
73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir
76TESTDIR = os.path.abspath(os.path.dirname(__file__))
79def makeExampleMetrics():
80 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
81 {"a": [1, 2, 3],
82 "b": {"blue": 5, "red": "green"}},
83 [563, 234, 456.7, 752, 8, 9, 27]
84 )
87class TransactionTestError(Exception):
88 """Specific error for testing transactions, to prevent misdiagnosing
89 that might otherwise occur when a standard exception is used.
90 """
91 pass
94class ButlerConfigTests(unittest.TestCase):
95 """Simple tests for ButlerConfig that are not tested in other test cases.
96 """
98 def testSearchPath(self):
99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
101 config1 = ButlerConfig(configFile)
102 self.assertNotIn("testConfigs", "\n".join(cm.output))
104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
107 self.assertIn("testConfigs", "\n".join(cm.output))
109 key = ("datastore", "records", "table")
110 self.assertNotEqual(config1[key], config2[key])
111 self.assertEqual(config2[key], "override_record")
114class ButlerPutGetTests:
115 """Helper method for running a suite of put/get tests from different
116 butler configurations."""
118 root = None
120 @staticmethod
121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
122 """Create a DatasetType and register it
123 """
124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
125 registry.registerDatasetType(datasetType)
126 return datasetType
128 @classmethod
129 def setUpClass(cls):
130 cls.storageClassFactory = StorageClassFactory()
131 cls.storageClassFactory.addFromConfig(cls.configFile)
133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
134 datasetType = datasetRef.datasetType
135 dataId = datasetRef.dataId
136 deferred = butler.getDirectDeferred(datasetRef)
138 for component in components:
139 compTypeName = datasetType.componentTypeName(component)
140 result = butler.get(compTypeName, dataId, collections=collections)
141 self.assertEqual(result, getattr(reference, component))
142 result_deferred = deferred.get(component=component)
143 self.assertEqual(result_deferred, result)
145 def tearDown(self):
146 removeTestTempDir(self.root)
148 def runPutGetTest(self, storageClass, datasetTypeName):
149 # New datasets will be added to run and tag, but we will only look in
150 # tag when looking up datasets.
151 run = "ingest"
152 butler = Butler(self.tmpConfigFile, run=run)
154 collections = set(butler.registry.queryCollections())
155 self.assertEqual(collections, set([run]))
157 # Create and register a DatasetType
158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
162 # Add needed Dimensions
163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
165 "name": "d-r",
166 "band": "R"})
167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
168 "id": 1,
169 "name": "default"})
170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
173 "name": "fourtwentythree", "physical_filter": "d-r",
174 "visit_system": 1, "datetime_begin": visit_start,
175 "datetime_end": visit_end})
177 # Add a second visit for some later tests
178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
179 "name": "fourtwentyfour", "physical_filter": "d-r",
180 "visit_system": 1})
182 # Create and store a dataset
183 metric = makeExampleMetrics()
184 dataId = {"instrument": "DummyCamComp", "visit": 423}
186 # Create a DatasetRef for put
187 refIn = DatasetRef(datasetType, dataId, id=None)
189 # Put with a preexisting id should fail
190 with self.assertRaises(ValueError):
191 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
193 # Put and remove the dataset once as a DatasetRef, once as a dataId,
194 # and once with a DatasetType
196 # Keep track of any collections we add and do not clean up
197 expected_collections = {run}
199 counter = 0
200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
201 # Since we are using subTest we can get cascading failures
202 # here with the first attempt failing and the others failing
203 # immediately because the dataset already exists. Work around
204 # this by using a distinct run collection each time
205 counter += 1
206 this_run = f"put_run_{counter}"
207 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
208 expected_collections.update({this_run})
210 with self.subTest(args=args):
211 ref = butler.put(metric, *args, run=this_run)
212 self.assertIsInstance(ref, DatasetRef)
214 # Test getDirect
215 metricOut = butler.getDirect(ref)
216 self.assertEqual(metric, metricOut)
217 # Test get
218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
219 self.assertEqual(metric, metricOut)
220 # Test get with a datasetRef
221 metricOut = butler.get(ref, collections=this_run)
222 self.assertEqual(metric, metricOut)
223 # Test getDeferred with dataId
224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
225 self.assertEqual(metric, metricOut)
226 # Test getDeferred with a datasetRef
227 metricOut = butler.getDeferred(ref, collections=this_run).get()
228 self.assertEqual(metric, metricOut)
229 # and deferred direct with ref
230 metricOut = butler.getDirectDeferred(ref).get()
231 self.assertEqual(metric, metricOut)
233 # Check we can get components
234 if storageClass.isComposite():
235 self.assertGetComponents(butler, ref,
236 ("summary", "data", "output"), metric,
237 collections=this_run)
239 # Now remove the dataset completely.
240 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run)
241 # Lookup with original args should still fail.
242 with self.assertRaises(LookupError):
243 butler.datasetExists(*args, collections=this_run)
244 # getDirect() should still fail.
245 with self.assertRaises(FileNotFoundError):
246 butler.getDirect(ref)
247 # Registry shouldn't be able to find it by dataset_id anymore.
248 self.assertIsNone(butler.registry.getDataset(ref.id))
250 # Do explicit registry removal since we know they are
251 # empty
252 butler.registry.removeCollection(this_run)
253 expected_collections.remove(this_run)
255 # Put the dataset again, since the last thing we did was remove it
256 # and we want to use the default collection.
257 ref = butler.put(metric, refIn)
259 # Get with parameters
260 stop = 4
261 sliced = butler.get(ref, parameters={"slice": slice(stop)})
262 self.assertNotEqual(metric, sliced)
263 self.assertEqual(metric.summary, sliced.summary)
264 self.assertEqual(metric.output, sliced.output)
265 self.assertEqual(metric.data[:stop], sliced.data)
266 # getDeferred with parameters
267 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
268 self.assertNotEqual(metric, sliced)
269 self.assertEqual(metric.summary, sliced.summary)
270 self.assertEqual(metric.output, sliced.output)
271 self.assertEqual(metric.data[:stop], sliced.data)
272 # getDeferred with deferred parameters
273 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
274 self.assertNotEqual(metric, sliced)
275 self.assertEqual(metric.summary, sliced.summary)
276 self.assertEqual(metric.output, sliced.output)
277 self.assertEqual(metric.data[:stop], sliced.data)
279 if storageClass.isComposite():
280 # Check that components can be retrieved
281 metricOut = butler.get(ref.datasetType.name, dataId)
282 compNameS = ref.datasetType.componentTypeName("summary")
283 compNameD = ref.datasetType.componentTypeName("data")
284 summary = butler.get(compNameS, dataId)
285 self.assertEqual(summary, metric.summary)
286 data = butler.get(compNameD, dataId)
287 self.assertEqual(data, metric.data)
289 if "counter" in storageClass.derivedComponents:
290 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
291 self.assertEqual(count, len(data))
293 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
294 parameters={"slice": slice(stop)})
295 self.assertEqual(count, stop)
297 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
298 summary = butler.getDirect(compRef)
299 self.assertEqual(summary, metric.summary)
301 # Create a Dataset type that has the same name but is inconsistent.
302 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
303 self.storageClassFactory.getStorageClass("Config"))
305 # Getting with a dataset type that does not match registry fails
306 with self.assertRaises(ValueError):
307 butler.get(inconsistentDatasetType, dataId)
309 # Combining a DatasetRef with a dataId should fail
310 with self.assertRaises(ValueError):
311 butler.get(ref, dataId)
312 # Getting with an explicit ref should fail if the id doesn't match
313 with self.assertRaises(ValueError):
314 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
316 # Getting a dataset with unknown parameters should fail
317 with self.assertRaises(KeyError):
318 butler.get(ref, parameters={"unsupported": True})
320 # Check we have a collection
321 collections = set(butler.registry.queryCollections())
322 self.assertEqual(collections, expected_collections)
324 # Clean up to check that we can remove something that may have
325 # already had a component removed
326 butler.pruneDatasets([ref], unstore=True, purge=True)
328 # Add a dataset back in since some downstream tests require
329 # something to be present
330 ref = butler.put(metric, refIn)
332 return butler
334 def testDeferredCollectionPassing(self):
335 # Construct a butler with no run or collection, but make it writeable.
336 butler = Butler(self.tmpConfigFile, writeable=True)
337 # Create and register a DatasetType
338 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
339 datasetType = self.addDatasetType("example", dimensions,
340 self.storageClassFactory.getStorageClass("StructuredData"),
341 butler.registry)
342 # Add needed Dimensions
343 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
344 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
345 "name": "d-r",
346 "band": "R"})
347 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
348 "name": "fourtwentythree", "physical_filter": "d-r"})
349 dataId = {"instrument": "DummyCamComp", "visit": 423}
350 # Create dataset.
351 metric = makeExampleMetrics()
352 # Register a new run and put dataset.
353 run = "deferred"
354 butler.registry.registerRun(run)
355 ref = butler.put(metric, datasetType, dataId, run=run)
356 # Putting with no run should fail with TypeError.
357 with self.assertRaises(TypeError):
358 butler.put(metric, datasetType, dataId)
359 # Dataset should exist.
360 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
361 # We should be able to get the dataset back, but with and without
362 # a deferred dataset handle.
363 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
364 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
365 # Trying to find the dataset without any collection is a TypeError.
366 with self.assertRaises(TypeError):
367 butler.datasetExists(datasetType, dataId)
368 with self.assertRaises(TypeError):
369 butler.get(datasetType, dataId)
370 # Associate the dataset with a different collection.
371 butler.registry.registerCollection("tagged")
372 butler.registry.associate("tagged", [ref])
373 # Deleting the dataset from the new collection should make it findable
374 # in the original collection.
375 butler.pruneDatasets([ref], tags=["tagged"])
376 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
379class ButlerTests(ButlerPutGetTests):
380 """Tests for Butler.
381 """
382 useTempRoot = True
384 def setUp(self):
385 """Create a new butler root for each test."""
386 self.root = makeTestTempDir(TESTDIR)
387 Butler.makeRepo(self.root, config=Config(self.configFile))
388 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
390 def testConstructor(self):
391 """Independent test of constructor.
392 """
393 butler = Butler(self.tmpConfigFile, run="ingest")
394 self.assertIsInstance(butler, Butler)
396 collections = set(butler.registry.queryCollections())
397 self.assertEqual(collections, {"ingest"})
399 butler2 = Butler(butler=butler, collections=["other"])
400 self.assertEqual(
401 butler2.collections,
402 CollectionSearch.fromExpression(["other"])
403 )
404 self.assertIsNone(butler2.run)
405 self.assertIs(butler.datastore, butler2.datastore)
407 def testBasicPutGet(self):
408 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
409 self.runPutGetTest(storageClass, "test_metric")
411 def testCompositePutGetConcrete(self):
413 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
414 butler = self.runPutGetTest(storageClass, "test_metric")
416 # Should *not* be disassembled
417 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
418 self.assertEqual(len(datasets), 1)
419 uri, components = butler.getURIs(datasets[0])
420 self.assertIsInstance(uri, ButlerURI)
421 self.assertFalse(components)
422 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
423 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
425 # Predicted dataset
426 dataId = {"instrument": "DummyCamComp", "visit": 424}
427 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
428 self.assertFalse(components)
429 self.assertIsInstance(uri, ButlerURI)
430 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
431 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
433 def testCompositePutGetVirtual(self):
434 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
435 butler = self.runPutGetTest(storageClass, "test_metric_comp")
437 # Should be disassembled
438 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
439 self.assertEqual(len(datasets), 1)
440 uri, components = butler.getURIs(datasets[0])
442 if butler.datastore.isEphemeral:
443 # Never disassemble in-memory datastore
444 self.assertIsInstance(uri, ButlerURI)
445 self.assertFalse(components)
446 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
447 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
448 else:
449 self.assertIsNone(uri)
450 self.assertEqual(set(components), set(storageClass.components))
451 for compuri in components.values():
452 self.assertIsInstance(compuri, ButlerURI)
453 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
454 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
456 # Predicted dataset
457 dataId = {"instrument": "DummyCamComp", "visit": 424}
458 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
460 if butler.datastore.isEphemeral:
461 # Never disassembled
462 self.assertIsInstance(uri, ButlerURI)
463 self.assertFalse(components)
464 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
465 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
466 else:
467 self.assertIsNone(uri)
468 self.assertEqual(set(components), set(storageClass.components))
469 for compuri in components.values():
470 self.assertIsInstance(compuri, ButlerURI)
471 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
472 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
474 def testIngest(self):
475 butler = Butler(self.tmpConfigFile, run="ingest")
477 # Create and register a DatasetType
478 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
480 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
481 datasetTypeName = "metric"
483 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
485 # Add needed Dimensions
486 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
487 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
488 "name": "d-r",
489 "band": "R"})
490 for detector in (1, 2):
491 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
492 "full_name": f"detector{detector}"})
494 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
495 "name": "fourtwentythree", "physical_filter": "d-r"},
496 {"instrument": "DummyCamComp", "id": 424,
497 "name": "fourtwentyfour", "physical_filter": "d-r"})
499 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
500 dataRoot = os.path.join(TESTDIR, "data", "basic")
501 datasets = []
502 for detector in (1, 2):
503 detector_name = f"detector_{detector}"
504 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
505 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
506 # Create a DatasetRef for ingest
507 refIn = DatasetRef(datasetType, dataId, id=None)
509 datasets.append(FileDataset(path=metricFile,
510 refs=[refIn],
511 formatter=formatter))
513 butler.ingest(*datasets, transfer="copy")
515 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
516 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
518 metrics1 = butler.get(datasetTypeName, dataId1)
519 metrics2 = butler.get(datasetTypeName, dataId2)
520 self.assertNotEqual(metrics1, metrics2)
522 # Compare URIs
523 uri1 = butler.getURI(datasetTypeName, dataId1)
524 uri2 = butler.getURI(datasetTypeName, dataId2)
525 self.assertNotEqual(uri1, uri2)
527 # Now do a multi-dataset but single file ingest
528 metricFile = os.path.join(dataRoot, "detectors.yaml")
529 refs = []
530 for detector in (1, 2):
531 detector_name = f"detector_{detector}"
532 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
533 # Create a DatasetRef for ingest
534 refs.append(DatasetRef(datasetType, dataId, id=None))
536 datasets = []
537 datasets.append(FileDataset(path=metricFile,
538 refs=refs,
539 formatter=MultiDetectorFormatter))
541 butler.ingest(*datasets, transfer="copy")
543 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
544 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
546 multi1 = butler.get(datasetTypeName, dataId1)
547 multi2 = butler.get(datasetTypeName, dataId2)
549 self.assertEqual(multi1, metrics1)
550 self.assertEqual(multi2, metrics2)
552 # Compare URIs
553 uri1 = butler.getURI(datasetTypeName, dataId1)
554 uri2 = butler.getURI(datasetTypeName, dataId2)
555 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
557 # Test that removing one does not break the second
558 # This line will issue a warning log message for a ChainedDatastore
559 # that uses an InMemoryDatastore since in-memory can not ingest
560 # files.
561 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
562 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
563 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
564 multi2b = butler.get(datasetTypeName, dataId2)
565 self.assertEqual(multi2, multi2b)
567 def testPruneCollections(self):
568 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
569 butler = Butler(self.tmpConfigFile, writeable=True)
570 # Load registry data with dimensions to hang datasets off of.
571 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
572 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
573 # Add some RUN-type collections.
574 run1 = "run1"
575 butler.registry.registerRun(run1)
576 run2 = "run2"
577 butler.registry.registerRun(run2)
578 # put some datasets. ref1 and ref2 have the same data ID, and are in
579 # different runs. ref3 has a different data ID.
580 metric = makeExampleMetrics()
581 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
582 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
583 butler.registry)
584 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
585 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
586 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
588 # Try to delete a RUN collection without purge, or with purge and not
589 # unstore.
590 with self.assertRaises(TypeError):
591 butler.pruneCollection(run1)
592 with self.assertRaises(TypeError):
593 butler.pruneCollection(run2, purge=True)
594 # Add a TAGGED collection and associate ref3 only into it.
595 tag1 = "tag1"
596 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
597 butler.registry.associate(tag1, [ref3])
598 # Add a CHAINED collection that searches run1 and then run2. It
599 # logically contains only ref1, because ref2 is shadowed due to them
600 # having the same data ID and dataset type.
601 chain1 = "chain1"
602 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
603 butler.registry.setCollectionChain(chain1, [run1, run2])
604 # Try to delete RUN collections, which should fail with complete
605 # rollback because they're still referenced by the CHAINED
606 # collection.
607 with self.assertRaises(Exception):
608 butler.pruneCollection(run1, pruge=True, unstore=True)
609 with self.assertRaises(Exception):
610 butler.pruneCollection(run2, pruge=True, unstore=True)
611 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
612 [ref1, ref2, ref3])
613 self.assertTrue(butler.datastore.exists(ref1))
614 self.assertTrue(butler.datastore.exists(ref2))
615 self.assertTrue(butler.datastore.exists(ref3))
616 # Try to delete CHAINED and TAGGED collections with purge; should not
617 # work.
618 with self.assertRaises(TypeError):
619 butler.pruneCollection(tag1, purge=True, unstore=True)
620 with self.assertRaises(TypeError):
621 butler.pruneCollection(chain1, purge=True, unstore=True)
622 # Remove the tagged collection with unstore=False. This should not
623 # affect the datasets.
624 butler.pruneCollection(tag1)
625 with self.assertRaises(MissingCollectionError):
626 butler.registry.getCollectionType(tag1)
627 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
628 [ref1, ref2, ref3])
629 self.assertTrue(butler.datastore.exists(ref1))
630 self.assertTrue(butler.datastore.exists(ref2))
631 self.assertTrue(butler.datastore.exists(ref3))
632 # Add the tagged collection back in, and remove it with unstore=True.
633 # This should remove ref3 only from the datastore.
634 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
635 butler.registry.associate(tag1, [ref3])
636 butler.pruneCollection(tag1, unstore=True)
637 with self.assertRaises(MissingCollectionError):
638 butler.registry.getCollectionType(tag1)
639 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
640 [ref1, ref2, ref3])
641 self.assertTrue(butler.datastore.exists(ref1))
642 self.assertTrue(butler.datastore.exists(ref2))
643 self.assertFalse(butler.datastore.exists(ref3))
644 # Delete the chain with unstore=False. The datasets should not be
645 # affected at all.
646 butler.pruneCollection(chain1)
647 with self.assertRaises(MissingCollectionError):
648 butler.registry.getCollectionType(chain1)
649 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
650 [ref1, ref2, ref3])
651 self.assertTrue(butler.datastore.exists(ref1))
652 self.assertTrue(butler.datastore.exists(ref2))
653 self.assertFalse(butler.datastore.exists(ref3))
654 # Redefine and then delete the chain with unstore=True. Only ref1
655 # should be unstored (ref3 has already been unstored, but otherwise
656 # would be now).
657 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
658 butler.registry.setCollectionChain(chain1, [run1, run2])
659 butler.pruneCollection(chain1, unstore=True)
660 with self.assertRaises(MissingCollectionError):
661 butler.registry.getCollectionType(chain1)
662 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
663 [ref1, ref2, ref3])
664 self.assertFalse(butler.datastore.exists(ref1))
665 self.assertTrue(butler.datastore.exists(ref2))
666 self.assertFalse(butler.datastore.exists(ref3))
667 # Remove run1. This removes ref1 and ref3 from the registry (they're
668 # already gone from the datastore, which is fine).
669 butler.pruneCollection(run1, purge=True, unstore=True)
670 with self.assertRaises(MissingCollectionError):
671 butler.registry.getCollectionType(run1)
672 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
673 [ref2])
674 self.assertTrue(butler.datastore.exists(ref2))
675 # Remove run2. This removes ref2 from the registry and the datastore.
676 butler.pruneCollection(run2, purge=True, unstore=True)
677 with self.assertRaises(MissingCollectionError):
678 butler.registry.getCollectionType(run2)
679 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
680 [])
682 # Now that the collections have been pruned we can remove the
683 # dataset type
684 butler.registry.removeDatasetType(datasetType.name)
686 def testPickle(self):
687 """Test pickle support.
688 """
689 butler = Butler(self.tmpConfigFile, run="ingest")
690 butlerOut = pickle.loads(pickle.dumps(butler))
691 self.assertIsInstance(butlerOut, Butler)
692 self.assertEqual(butlerOut._config, butler._config)
693 self.assertEqual(butlerOut.collections, butler.collections)
694 self.assertEqual(butlerOut.run, butler.run)
696 def testGetDatasetTypes(self):
697 butler = Butler(self.tmpConfigFile, run="ingest")
698 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
699 dimensionEntries = [
700 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
701 {"instrument": "DummyCamComp"}),
702 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
703 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
704 ]
705 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
706 # Add needed Dimensions
707 for args in dimensionEntries:
708 butler.registry.insertDimensionData(*args)
710 # When a DatasetType is added to the registry entries are not created
711 # for components but querying them can return the components.
712 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
713 components = set()
714 for datasetTypeName in datasetTypeNames:
715 # Create and register a DatasetType
716 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
718 for componentName in storageClass.components:
719 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
721 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
722 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
724 # Now that we have some dataset types registered, validate them
725 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
726 "datasetType.component"])
728 # Add a new datasetType that will fail template validation
729 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
730 if self.validationCanFail:
731 with self.assertRaises(ValidationError):
732 butler.validateConfiguration()
734 # Rerun validation but with a subset of dataset type names
735 butler.validateConfiguration(datasetTypeNames=["metric4"])
737 # Rerun validation but ignore the bad datasetType
738 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
739 "datasetType.component"])
741 def testTransaction(self):
742 butler = Butler(self.tmpConfigFile, run="ingest")
743 datasetTypeName = "test_metric"
744 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
745 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
746 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
747 "band": "R"}),
748 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
749 "physical_filter": "d-r"}))
750 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
751 metric = makeExampleMetrics()
752 dataId = {"instrument": "DummyCam", "visit": 42}
753 # Create and register a DatasetType
754 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
755 with self.assertRaises(TransactionTestError):
756 with butler.transaction():
757 # Add needed Dimensions
758 for args in dimensionEntries:
759 butler.registry.insertDimensionData(*args)
760 # Store a dataset
761 ref = butler.put(metric, datasetTypeName, dataId)
762 self.assertIsInstance(ref, DatasetRef)
763 # Test getDirect
764 metricOut = butler.getDirect(ref)
765 self.assertEqual(metric, metricOut)
766 # Test get
767 metricOut = butler.get(datasetTypeName, dataId)
768 self.assertEqual(metric, metricOut)
769 # Check we can get components
770 self.assertGetComponents(butler, ref,
771 ("summary", "data", "output"), metric)
772 raise TransactionTestError("This should roll back the entire transaction")
773 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
774 butler.registry.expandDataId(dataId)
775 # Should raise LookupError for missing data ID value
776 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
777 butler.get(datasetTypeName, dataId)
778 # Also check explicitly if Dataset entry is missing
779 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
780 # Direct retrieval should not find the file in the Datastore
781 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
782 butler.getDirect(ref)
784 def testMakeRepo(self):
785 """Test that we can write butler configuration to a new repository via
786 the Butler.makeRepo interface and then instantiate a butler from the
787 repo root.
788 """
789 # Do not run the test if we know this datastore configuration does
790 # not support a file system root
791 if self.fullConfigKey is None:
792 return
794 # create two separate directories
795 root1 = tempfile.mkdtemp(dir=self.root)
796 root2 = tempfile.mkdtemp(dir=self.root)
798 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
799 limited = Config(self.configFile)
800 butler1 = Butler(butlerConfig)
801 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
802 full = Config(self.tmpConfigFile)
803 butler2 = Butler(butlerConfig)
804 # Butlers should have the same configuration regardless of whether
805 # defaults were expanded.
806 self.assertEqual(butler1._config, butler2._config)
807 # Config files loaded directly should not be the same.
808 self.assertNotEqual(limited, full)
809 # Make sure "limited" doesn't have a few keys we know it should be
810 # inheriting from defaults.
811 self.assertIn(self.fullConfigKey, full)
812 self.assertNotIn(self.fullConfigKey, limited)
814 # Collections don't appear until something is put in them
815 collections1 = set(butler1.registry.queryCollections())
816 self.assertEqual(collections1, set())
817 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
819 # Check that a config with no associated file name will not
820 # work properly with relocatable Butler repo
821 butlerConfig.configFile = None
822 with self.assertRaises(ValueError):
823 Butler(butlerConfig)
825 with self.assertRaises(FileExistsError):
826 Butler.makeRepo(self.root, standalone=True,
827 config=Config(self.configFile), overwrite=False)
829 def testStringification(self):
830 butler = Butler(self.tmpConfigFile, run="ingest")
831 butlerStr = str(butler)
833 if self.datastoreStr is not None:
834 for testStr in self.datastoreStr:
835 self.assertIn(testStr, butlerStr)
836 if self.registryStr is not None:
837 self.assertIn(self.registryStr, butlerStr)
839 datastoreName = butler.datastore.name
840 if self.datastoreName is not None:
841 for testStr in self.datastoreName:
842 self.assertIn(testStr, datastoreName)
845class FileDatastoreButlerTests(ButlerTests):
846 """Common tests and specialization of ButlerTests for butlers backed
847 by datastores that inherit from FileDatastore.
848 """
850 def checkFileExists(self, root, relpath):
851 """Checks if file exists at a given path (relative to root).
853 Test testPutTemplates verifies actual physical existance of the files
854 in the requested location.
855 """
856 uri = ButlerURI(root, forceDirectory=True)
857 return uri.join(relpath).exists()
859 def testPutTemplates(self):
860 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
861 butler = Butler(self.tmpConfigFile, run="ingest")
863 # Add needed Dimensions
864 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
865 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
866 "name": "d-r",
867 "band": "R"})
868 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
869 "physical_filter": "d-r"})
870 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
871 "physical_filter": "d-r"})
873 # Create and store a dataset
874 metric = makeExampleMetrics()
876 # Create two almost-identical DatasetTypes (both will use default
877 # template)
878 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
879 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
880 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
881 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
883 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
884 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
886 # Put with exactly the data ID keys needed
887 ref = butler.put(metric, "metric1", dataId1)
888 uri = butler.getURI(ref)
889 self.assertTrue(self.checkFileExists(butler.datastore.root,
890 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
891 f"Checking existence of {uri}")
893 # Check the template based on dimensions
894 butler.datastore.templates.validateTemplates([ref])
896 # Put with extra data ID keys (physical_filter is an optional
897 # dependency); should not change template (at least the way we're
898 # defining them to behave now; the important thing is that they
899 # must be consistent).
900 ref = butler.put(metric, "metric2", dataId2)
901 uri = butler.getURI(ref)
902 self.assertTrue(self.checkFileExists(butler.datastore.root,
903 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
904 f"Checking existence of {uri}")
906 # Check the template based on dimensions
907 butler.datastore.templates.validateTemplates([ref])
909 # Now use a file template that will not result in unique filenames
910 with self.assertRaises(FileTemplateValidationError):
911 butler.put(metric, "metric3", dataId1)
913 def testImportExport(self):
914 # Run put/get tests just to create and populate a repo.
915 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
916 self.runImportExportTest(storageClass)
918 @unittest.expectedFailure
919 def testImportExportVirtualComposite(self):
920 # Run put/get tests just to create and populate a repo.
921 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
922 self.runImportExportTest(storageClass)
924 def runImportExportTest(self, storageClass):
925 """This test does an export to a temp directory and an import back
926 into a new temp directory repo. It does not assume a posix datastore"""
927 exportButler = self.runPutGetTest(storageClass, "test_metric")
928 print("Root:", exportButler.datastore.root)
929 # Test that the repo actually has at least one dataset.
930 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
931 self.assertGreater(len(datasets), 0)
932 # Add a DimensionRecord that's unused by those datasets.
933 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
934 exportButler.registry.insertDimensionData("skymap", skymapRecord)
935 # Export and then import datasets.
936 with safeTestTempDir(TESTDIR) as exportDir:
937 exportFile = os.path.join(exportDir, "exports.yaml")
938 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
939 export.saveDatasets(datasets)
940 # Export the same datasets again. This should quietly do
941 # nothing because of internal deduplication, and it shouldn't
942 # complain about being asked to export the "htm7" elements even
943 # though there aren't any in these datasets or in the database.
944 export.saveDatasets(datasets, elements=["htm7"])
945 # Save one of the data IDs again; this should be harmless
946 # because of internal deduplication.
947 export.saveDataIds([datasets[0].dataId])
948 # Save some dimension records directly.
949 export.saveDimensionData("skymap", [skymapRecord])
950 self.assertTrue(os.path.exists(exportFile))
951 with safeTestTempDir(TESTDIR) as importDir:
952 # We always want this to be a local posix butler
953 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
954 # Calling script.butlerImport tests the implementation of the
955 # butler command line interface "import" subcommand. Functions
956 # in the script folder are generally considered protected and
957 # should not be used as public api.
958 with open(exportFile, "r") as f:
959 script.butlerImport(importDir, export_file=f,
960 directory=exportDir, transfer="auto", skip_dimensions=None)
961 importButler = Butler(importDir, run="ingest")
962 for ref in datasets:
963 with self.subTest(ref=ref):
964 # Test for existence by passing in the DatasetType and
965 # data ID separately, to avoid lookup by dataset_id.
966 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
967 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
968 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
970 def testRemoveRuns(self):
971 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
972 butler = Butler(self.tmpConfigFile, writeable=True)
973 # Load registry data with dimensions to hang datasets off of.
974 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
975 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
976 # Add some RUN-type collection.
977 run1 = "run1"
978 butler.registry.registerRun(run1)
979 run2 = "run2"
980 butler.registry.registerRun(run2)
981 # put a dataset in each
982 metric = makeExampleMetrics()
983 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
984 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
985 butler.registry)
986 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
987 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
988 uri1 = butler.getURI(ref1, collections=[run1])
989 uri2 = butler.getURI(ref2, collections=[run2])
990 # Remove from both runs with different values for unstore.
991 butler.removeRuns([run1], unstore=True)
992 butler.removeRuns([run2], unstore=False)
993 # Should be nothing in registry for either one, and datastore should
994 # not think either exists.
995 with self.assertRaises(MissingCollectionError):
996 butler.registry.getCollectionType(run1)
997 with self.assertRaises(MissingCollectionError):
998 butler.registry.getCollectionType(run2)
999 self.assertFalse(butler.datastore.exists(ref1))
1000 self.assertFalse(butler.datastore.exists(ref2))
1001 # The ref we unstored should be gone according to the URI, but the
1002 # one we forgot should still be around.
1003 self.assertFalse(uri1.exists())
1004 self.assertTrue(uri2.exists())
1007class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1008 """PosixDatastore specialization of a butler"""
1009 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1010 fullConfigKey = ".datastore.formatters"
1011 validationCanFail = True
1012 datastoreStr = ["/tmp"]
1013 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1014 registryStr = "/gen3.sqlite3"
1016 def testExportTransferCopy(self):
1017 """Test local export using all transfer modes"""
1018 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1019 exportButler = self.runPutGetTest(storageClass, "test_metric")
1020 # Test that the repo actually has at least one dataset.
1021 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1022 self.assertGreater(len(datasets), 0)
1023 uris = [exportButler.getURI(d) for d in datasets]
1024 datastoreRoot = exportButler.datastore.root
1026 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1028 for path in pathsInStore:
1029 # Assume local file system
1030 self.assertTrue(self.checkFileExists(datastoreRoot, path),
1031 f"Checking path {path}")
1033 for transfer in ("copy", "link", "symlink", "relsymlink"):
1034 with safeTestTempDir(TESTDIR) as exportDir:
1035 with exportButler.export(directory=exportDir, format="yaml",
1036 transfer=transfer) as export:
1037 export.saveDatasets(datasets)
1038 for path in pathsInStore:
1039 self.assertTrue(self.checkFileExists(exportDir, path),
1040 f"Check that mode {transfer} exported files")
1043class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1044 """InMemoryDatastore specialization of a butler"""
1045 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1046 fullConfigKey = None
1047 useTempRoot = False
1048 validationCanFail = False
1049 datastoreStr = ["datastore='InMemory"]
1050 datastoreName = ["InMemoryDatastore@"]
1051 registryStr = "/gen3.sqlite3"
1053 def testIngest(self):
1054 pass
1057class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1058 """PosixDatastore specialization"""
1059 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1060 fullConfigKey = ".datastore.datastores.1.formatters"
1061 validationCanFail = True
1062 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1063 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1064 "SecondDatastore"]
1065 registryStr = "/gen3.sqlite3"
1068class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1069 """Test that a yaml file in one location can refer to a root in another."""
1071 datastoreStr = ["dir1"]
1072 # Disable the makeRepo test since we are deliberately not using
1073 # butler.yaml as the config name.
1074 fullConfigKey = None
1076 def setUp(self):
1077 self.root = makeTestTempDir(TESTDIR)
1079 # Make a new repository in one place
1080 self.dir1 = os.path.join(self.root, "dir1")
1081 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1083 # Move the yaml file to a different place and add a "root"
1084 self.dir2 = os.path.join(self.root, "dir2")
1085 safeMakeDir(self.dir2)
1086 configFile1 = os.path.join(self.dir1, "butler.yaml")
1087 config = Config(configFile1)
1088 config["root"] = self.dir1
1089 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1090 config.dumpToUri(configFile2)
1091 os.remove(configFile1)
1092 self.tmpConfigFile = configFile2
1094 def testFileLocations(self):
1095 self.assertNotEqual(self.dir1, self.dir2)
1096 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1097 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1098 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1101class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1102 """Test that a config file created by makeRepo outside of repo works."""
1104 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1106 def setUp(self):
1107 self.root = makeTestTempDir(TESTDIR)
1108 self.root2 = makeTestTempDir(TESTDIR)
1110 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1111 Butler.makeRepo(self.root, config=Config(self.configFile),
1112 outfile=self.tmpConfigFile)
1114 def tearDown(self):
1115 if os.path.exists(self.root2):
1116 shutil.rmtree(self.root2, ignore_errors=True)
1117 super().tearDown()
1119 def testConfigExistence(self):
1120 c = Config(self.tmpConfigFile)
1121 uri_config = ButlerURI(c["root"])
1122 uri_expected = ButlerURI(self.root, forceDirectory=True)
1123 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1124 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1126 def testPutGet(self):
1127 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1128 self.runPutGetTest(storageClass, "test_metric")
1131class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1132 """Test that a config file created by makeRepo outside of repo works."""
1134 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1136 def setUp(self):
1137 self.root = makeTestTempDir(TESTDIR)
1138 self.root2 = makeTestTempDir(TESTDIR)
1140 self.tmpConfigFile = self.root2
1141 Butler.makeRepo(self.root, config=Config(self.configFile),
1142 outfile=self.tmpConfigFile)
1144 def testConfigExistence(self):
1145 # Append the yaml file else Config constructor does not know the file
1146 # type.
1147 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1148 super().testConfigExistence()
1151class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1152 """Test that a config file created by makeRepo outside of repo works."""
1154 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1156 def setUp(self):
1157 self.root = makeTestTempDir(TESTDIR)
1158 self.root2 = makeTestTempDir(TESTDIR)
1160 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1161 Butler.makeRepo(self.root, config=Config(self.configFile),
1162 outfile=self.tmpConfigFile)
1165@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1166@mock_s3
1167class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1168 """S3Datastore specialization of a butler; an S3 storage Datastore +
1169 a local in-memory SqlRegistry.
1170 """
1171 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1172 fullConfigKey = None
1173 validationCanFail = True
1175 bucketName = "anybucketname"
1176 """Name of the Bucket that will be used in the tests. The name is read from
1177 the config file used with the tests during set-up.
1178 """
1180 root = "butlerRoot/"
1181 """Root repository directory expected to be used in case useTempRoot=False.
1182 Otherwise the root is set to a 20 characters long randomly generated string
1183 during set-up.
1184 """
1186 datastoreStr = [f"datastore={root}"]
1187 """Contains all expected root locations in a format expected to be
1188 returned by Butler stringification.
1189 """
1191 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1192 """The expected format of the S3 Datastore string."""
1194 registryStr = "/gen3.sqlite3"
1195 """Expected format of the Registry string."""
1197 def genRoot(self):
1198 """Returns a random string of len 20 to serve as a root
1199 name for the temporary bucket repo.
1201 This is equivalent to tempfile.mkdtemp as this is what self.root
1202 becomes when useTempRoot is True.
1203 """
1204 rndstr = "".join(
1205 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1206 )
1207 return rndstr + "/"
1209 def setUp(self):
1210 config = Config(self.configFile)
1211 uri = ButlerURI(config[".datastore.datastore.root"])
1212 self.bucketName = uri.netloc
1214 # set up some fake credentials if they do not exist
1215 self.usingDummyCredentials = setAwsEnvCredentials()
1217 if self.useTempRoot:
1218 self.root = self.genRoot()
1219 rooturi = f"s3://{self.bucketName}/{self.root}"
1220 config.update({"datastore": {"datastore": {"root": rooturi}}})
1222 # need local folder to store registry database
1223 self.reg_dir = makeTestTempDir(TESTDIR)
1224 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1226 # MOTO needs to know that we expect Bucket bucketname to exist
1227 # (this used to be the class attribute bucketName)
1228 s3 = boto3.resource("s3")
1229 s3.create_bucket(Bucket=self.bucketName)
1231 self.datastoreStr = f"datastore={self.root}"
1232 self.datastoreName = [f"FileDatastore@{rooturi}"]
1233 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1234 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1236 def tearDown(self):
1237 s3 = boto3.resource("s3")
1238 bucket = s3.Bucket(self.bucketName)
1239 try:
1240 bucket.objects.all().delete()
1241 except botocore.exceptions.ClientError as e:
1242 if e.response["Error"]["Code"] == "404":
1243 # the key was not reachable - pass
1244 pass
1245 else:
1246 raise
1248 bucket = s3.Bucket(self.bucketName)
1249 bucket.delete()
1251 # unset any potentially set dummy credentials
1252 if self.usingDummyCredentials:
1253 unsetAwsEnvCredentials()
1255 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1256 shutil.rmtree(self.reg_dir, ignore_errors=True)
1259@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1260# Mock required environment variables during tests
1261@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1262 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1263 TESTDIR, "config/testConfigs/webdav/token"),
1264 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1265class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1266 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1267 a local in-memory SqlRegistry.
1268 """
1269 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1270 fullConfigKey = None
1271 validationCanFail = True
1273 serverName = "localhost"
1274 """Name of the server that will be used in the tests.
1275 """
1277 portNumber = 8080
1278 """Port on which the webdav server listens. Automatically chosen
1279 at setUpClass via the _getfreeport() method
1280 """
1282 root = "butlerRoot/"
1283 """Root repository directory expected to be used in case useTempRoot=False.
1284 Otherwise the root is set to a 20 characters long randomly generated string
1285 during set-up.
1286 """
1288 datastoreStr = [f"datastore={root}"]
1289 """Contains all expected root locations in a format expected to be
1290 returned by Butler stringification.
1291 """
1293 datastoreName = ["FileDatastore@https://{serverName}/{root}"]
1294 """The expected format of the WebdavDatastore string."""
1296 registryStr = "/gen3.sqlite3"
1297 """Expected format of the Registry string."""
1299 serverThread = None
1300 """Thread in which the local webdav server will run"""
1302 stopWebdavServer = False
1303 """This flag will cause the webdav server to
1304 gracefully shut down when True
1305 """
1307 def genRoot(self):
1308 """Returns a random string of len 20 to serve as a root
1309 name for the temporary bucket repo.
1311 This is equivalent to tempfile.mkdtemp as this is what self.root
1312 becomes when useTempRoot is True.
1313 """
1314 rndstr = "".join(
1315 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1316 )
1317 return rndstr + "/"
1319 @classmethod
1320 def setUpClass(cls):
1321 # Do the same as inherited class
1322 cls.storageClassFactory = StorageClassFactory()
1323 cls.storageClassFactory.addFromConfig(cls.configFile)
1325 cls.portNumber = cls._getfreeport()
1326 # Run a local webdav server on which tests will be run
1327 cls.serverThread = Thread(target=cls._serveWebdav,
1328 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1329 daemon=True)
1330 cls.serverThread.start()
1331 # Wait for it to start
1332 time.sleep(3)
1334 @classmethod
1335 def tearDownClass(cls):
1336 # Ask for graceful shut down of the webdav server
1337 cls.stopWebdavServer = True
1338 # Wait for the thread to exit
1339 cls.serverThread.join()
1341 # Mock required environment variables during tests
1342 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1343 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1344 TESTDIR, "config/testConfigs/webdav/token"),
1345 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1346 def setUp(self):
1347 config = Config(self.configFile)
1349 if self.useTempRoot:
1350 self.root = self.genRoot()
1351 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1352 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1354 # need local folder to store registry database
1355 self.reg_dir = makeTestTempDir(TESTDIR)
1356 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1358 self.datastoreStr = f"datastore={self.root}"
1359 self.datastoreName = [f"FileDatastore@{self.rooturi}"]
1361 if not isWebdavEndpoint(self.rooturi):
1362 raise OSError("Webdav server not running properly: cannot run tests.")
1364 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1365 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1367 # Mock required environment variables during tests
1368 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1369 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1370 TESTDIR, "config/testConfigs/webdav/token"),
1371 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1372 def tearDown(self):
1373 # Clear temporary directory
1374 ButlerURI(self.rooturi).remove()
1375 ButlerURI(self.rooturi).session.close()
1377 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1378 shutil.rmtree(self.reg_dir, ignore_errors=True)
1380 def _serveWebdav(self, port: int, stopWebdavServer):
1381 """Starts a local webdav-compatible HTTP server,
1382 Listening on http://localhost:8080
1383 This server only runs when this test class is instantiated,
1384 and then shuts down. Must be started is a separate thread.
1386 Parameters
1387 ----------
1388 port : `int`
1389 The port number on which the server should listen
1390 """
1391 root_path = gettempdir()
1393 config = {
1394 "host": "0.0.0.0",
1395 "port": port,
1396 "provider_mapping": {"/": root_path},
1397 "http_authenticator": {
1398 "domain_controller": None
1399 },
1400 "simple_dc": {"user_mapping": {"*": True}},
1401 "verbose": 0,
1402 }
1403 app = WsgiDAVApp(config)
1405 server_args = {
1406 "bind_addr": (config["host"], config["port"]),
1407 "wsgi_app": app,
1408 }
1409 server = wsgi.Server(**server_args)
1410 server.prepare()
1412 try:
1413 # Start the actual server in a separate thread
1414 t = Thread(target=server.serve, daemon=True)
1415 t.start()
1416 # watch stopWebdavServer, and gracefully
1417 # shut down the server when True
1418 while True:
1419 if stopWebdavServer():
1420 break
1421 time.sleep(1)
1422 except KeyboardInterrupt:
1423 print("Caught Ctrl-C, shutting down...")
1424 finally:
1425 server.stop()
1426 t.join()
1428 def _getfreeport():
1429 """
1430 Determines a free port using sockets.
1431 """
1432 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1433 free_socket.bind(('0.0.0.0', 0))
1434 free_socket.listen()
1435 port = free_socket.getsockname()[1]
1436 free_socket.close()
1437 return port
1440if __name__ == "__main__": 1440 ↛ 1441line 1440 didn't jump to line 1441, because the condition on line 1440 was never true
1441 unittest.main()