Coverage for tests/test_butler.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import time
34import socket
36try:
37 import boto3
38 import botocore
39 from moto import mock_s3
40except ImportError:
41 boto3 = None
43 def mock_s3(cls):
44 """A no-op decorator in case moto mock_s3 can not be imported.
45 """
46 return cls
48try:
49 from cheroot import wsgi
50 from wsgidav.wsgidav_app import WsgiDAVApp
51except ImportError:
52 WsgiDAVApp = None
54import astropy.time
55from threading import Thread
56from tempfile import gettempdir
57from lsst.utils import doImport
58from lsst.daf.butler.core.utils import safeMakeDir
59from lsst.daf.butler import Butler, Config, ButlerConfig
60from lsst.daf.butler import StorageClassFactory
61from lsst.daf.butler import DatasetType, DatasetRef, DatasetIdGenEnum
62from lsst.daf.butler import FileTemplateValidationError, ValidationError
63from lsst.daf.butler import FileDataset
64from lsst.daf.butler import CollectionSearch, CollectionType
65from lsst.daf.butler import ButlerURI
66from lsst.daf.butler import script
67from lsst.daf.butler.registry import MissingCollectionError, ConflictingDefinitionError
68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials,
70 unsetAwsEnvCredentials)
71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint
73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir
76TESTDIR = os.path.abspath(os.path.dirname(__file__))
79def makeExampleMetrics():
80 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
81 {"a": [1, 2, 3],
82 "b": {"blue": 5, "red": "green"}},
83 [563, 234, 456.7, 752, 8, 9, 27]
84 )
87class TransactionTestError(Exception):
88 """Specific error for testing transactions, to prevent misdiagnosing
89 that might otherwise occur when a standard exception is used.
90 """
91 pass
94class ButlerConfigTests(unittest.TestCase):
95 """Simple tests for ButlerConfig that are not tested in other test cases.
96 """
98 def testSearchPath(self):
99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
101 config1 = ButlerConfig(configFile)
102 self.assertNotIn("testConfigs", "\n".join(cm.output))
104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
107 self.assertIn("testConfigs", "\n".join(cm.output))
109 key = ("datastore", "records", "table")
110 self.assertNotEqual(config1[key], config2[key])
111 self.assertEqual(config2[key], "override_record")
114class ButlerPutGetTests:
115 """Helper method for running a suite of put/get tests from different
116 butler configurations."""
118 root = None
120 @staticmethod
121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
122 """Create a DatasetType and register it
123 """
124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
125 registry.registerDatasetType(datasetType)
126 return datasetType
128 @classmethod
129 def setUpClass(cls):
130 cls.storageClassFactory = StorageClassFactory()
131 cls.storageClassFactory.addFromConfig(cls.configFile)
133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
134 datasetType = datasetRef.datasetType
135 dataId = datasetRef.dataId
136 deferred = butler.getDirectDeferred(datasetRef)
138 for component in components:
139 compTypeName = datasetType.componentTypeName(component)
140 result = butler.get(compTypeName, dataId, collections=collections)
141 self.assertEqual(result, getattr(reference, component))
142 result_deferred = deferred.get(component=component)
143 self.assertEqual(result_deferred, result)
145 def tearDown(self):
146 removeTestTempDir(self.root)
148 def runPutGetTest(self, storageClass, datasetTypeName):
149 # New datasets will be added to run and tag, but we will only look in
150 # tag when looking up datasets.
151 run = "ingest"
152 butler = Butler(self.tmpConfigFile, run=run)
154 collections = set(butler.registry.queryCollections())
155 self.assertEqual(collections, set([run]))
157 # Create and register a DatasetType
158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
162 # Add needed Dimensions
163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
165 "name": "d-r",
166 "band": "R"})
167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
168 "id": 1,
169 "name": "default"})
170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
173 "name": "fourtwentythree", "physical_filter": "d-r",
174 "visit_system": 1, "datetime_begin": visit_start,
175 "datetime_end": visit_end})
177 # Add a second visit for some later tests
178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
179 "name": "fourtwentyfour", "physical_filter": "d-r",
180 "visit_system": 1})
182 # Create and store a dataset
183 metric = makeExampleMetrics()
184 dataId = {"instrument": "DummyCamComp", "visit": 423}
186 # Create a DatasetRef for put
187 refIn = DatasetRef(datasetType, dataId, id=None)
189 # Put with a preexisting id should fail
190 with self.assertRaises(ValueError):
191 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
193 # Put and remove the dataset once as a DatasetRef, once as a dataId,
194 # and once with a DatasetType
196 # Keep track of any collections we add and do not clean up
197 expected_collections = {run}
199 counter = 0
200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
201 # Since we are using subTest we can get cascading failures
202 # here with the first attempt failing and the others failing
203 # immediately because the dataset already exists. Work around
204 # this by using a distinct run collection each time
205 counter += 1
206 this_run = f"put_run_{counter}"
207 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
208 expected_collections.update({this_run})
210 with self.subTest(args=args):
211 ref = butler.put(metric, *args, run=this_run)
212 self.assertIsInstance(ref, DatasetRef)
214 # Test getDirect
215 metricOut = butler.getDirect(ref)
216 self.assertEqual(metric, metricOut)
217 # Test get
218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
219 self.assertEqual(metric, metricOut)
220 # Test get with a datasetRef
221 metricOut = butler.get(ref, collections=this_run)
222 self.assertEqual(metric, metricOut)
223 # Test getDeferred with dataId
224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
225 self.assertEqual(metric, metricOut)
226 # Test getDeferred with a datasetRef
227 metricOut = butler.getDeferred(ref, collections=this_run).get()
228 self.assertEqual(metric, metricOut)
229 # and deferred direct with ref
230 metricOut = butler.getDirectDeferred(ref).get()
231 self.assertEqual(metric, metricOut)
233 # Check we can get components
234 if storageClass.isComposite():
235 self.assertGetComponents(butler, ref,
236 ("summary", "data", "output"), metric,
237 collections=this_run)
239 # Can the artifacts themselves be retrieved?
240 if not butler.datastore.isEphemeral:
241 root_uri = ButlerURI(self.root)
243 for preserve_path in (True, False):
244 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
245 transferred = butler.retrieveArtifacts([ref], destination,
246 preserve_path=preserve_path)
247 self.assertGreater(len(transferred), 0)
248 artifacts = list(ButlerURI.findFileResources([destination]))
249 self.assertEqual(set(transferred), set(artifacts))
251 for artifact in transferred:
252 path_in_destination = artifact.relative_to(destination)
253 self.assertIsNotNone(path_in_destination)
255 # when path is not preserved there should not be
256 # any path separators.
257 num_seps = path_in_destination.count("/")
258 if preserve_path:
259 self.assertGreater(num_seps, 0)
260 else:
261 self.assertEqual(num_seps, 0)
263 primary_uri, secondary_uris = butler.datastore.getURIs(ref)
264 n_uris = len(secondary_uris)
265 if primary_uri:
266 n_uris += 1
267 self.assertEqual(len(artifacts), n_uris, "Comparing expected artifacts vs actual:"
268 f" {artifacts} vs {primary_uri} and {secondary_uris}")
270 if preserve_path:
271 # No need to run these twice
272 with self.assertRaises(ValueError):
273 butler.retrieveArtifacts([ref], destination, transfer="move")
275 with self.assertRaises(FileExistsError):
276 butler.retrieveArtifacts([ref], destination)
278 transferred_again = butler.retrieveArtifacts([ref], destination,
279 preserve_path=preserve_path,
280 overwrite=True)
281 self.assertEqual(set(transferred_again), set(transferred))
283 # Now remove the dataset completely.
284 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run)
285 # Lookup with original args should still fail.
286 with self.assertRaises(LookupError):
287 butler.datasetExists(*args, collections=this_run)
288 # getDirect() should still fail.
289 with self.assertRaises(FileNotFoundError):
290 butler.getDirect(ref)
291 # Registry shouldn't be able to find it by dataset_id anymore.
292 self.assertIsNone(butler.registry.getDataset(ref.id))
294 # Do explicit registry removal since we know they are
295 # empty
296 butler.registry.removeCollection(this_run)
297 expected_collections.remove(this_run)
299 # Put the dataset again, since the last thing we did was remove it
300 # and we want to use the default collection.
301 ref = butler.put(metric, refIn)
303 # Get with parameters
304 stop = 4
305 sliced = butler.get(ref, parameters={"slice": slice(stop)})
306 self.assertNotEqual(metric, sliced)
307 self.assertEqual(metric.summary, sliced.summary)
308 self.assertEqual(metric.output, sliced.output)
309 self.assertEqual(metric.data[:stop], sliced.data)
310 # getDeferred with parameters
311 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
312 self.assertNotEqual(metric, sliced)
313 self.assertEqual(metric.summary, sliced.summary)
314 self.assertEqual(metric.output, sliced.output)
315 self.assertEqual(metric.data[:stop], sliced.data)
316 # getDeferred with deferred parameters
317 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
318 self.assertNotEqual(metric, sliced)
319 self.assertEqual(metric.summary, sliced.summary)
320 self.assertEqual(metric.output, sliced.output)
321 self.assertEqual(metric.data[:stop], sliced.data)
323 if storageClass.isComposite():
324 # Check that components can be retrieved
325 metricOut = butler.get(ref.datasetType.name, dataId)
326 compNameS = ref.datasetType.componentTypeName("summary")
327 compNameD = ref.datasetType.componentTypeName("data")
328 summary = butler.get(compNameS, dataId)
329 self.assertEqual(summary, metric.summary)
330 data = butler.get(compNameD, dataId)
331 self.assertEqual(data, metric.data)
333 if "counter" in storageClass.derivedComponents:
334 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
335 self.assertEqual(count, len(data))
337 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
338 parameters={"slice": slice(stop)})
339 self.assertEqual(count, stop)
341 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
342 summary = butler.getDirect(compRef)
343 self.assertEqual(summary, metric.summary)
345 # Create a Dataset type that has the same name but is inconsistent.
346 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
347 self.storageClassFactory.getStorageClass("Config"))
349 # Getting with a dataset type that does not match registry fails
350 with self.assertRaises(ValueError):
351 butler.get(inconsistentDatasetType, dataId)
353 # Combining a DatasetRef with a dataId should fail
354 with self.assertRaises(ValueError):
355 butler.get(ref, dataId)
356 # Getting with an explicit ref should fail if the id doesn't match
357 with self.assertRaises(ValueError):
358 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
360 # Getting a dataset with unknown parameters should fail
361 with self.assertRaises(KeyError):
362 butler.get(ref, parameters={"unsupported": True})
364 # Check we have a collection
365 collections = set(butler.registry.queryCollections())
366 self.assertEqual(collections, expected_collections)
368 # Clean up to check that we can remove something that may have
369 # already had a component removed
370 butler.pruneDatasets([ref], unstore=True, purge=True)
372 # Check that we can configure a butler to accept a put even
373 # if it already has the dataset in registry.
374 ref = butler.put(metric, refIn)
376 # Repeat put will fail.
377 with self.assertRaises(ConflictingDefinitionError):
378 butler.put(metric, refIn)
380 # Remove the datastore entry.
381 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
383 # Put will still fail
384 with self.assertRaises(ConflictingDefinitionError):
385 butler.put(metric, refIn)
387 # Allow the put to succeed
388 butler._allow_put_of_predefined_dataset = True
389 ref2 = butler.put(metric, refIn)
390 self.assertEqual(ref2.id, ref.id)
392 # A second put will still fail but with a different exception
393 # than before.
394 with self.assertRaises(ConflictingDefinitionError):
395 butler.put(metric, refIn)
397 # Reset the flag to avoid confusion
398 butler._allow_put_of_predefined_dataset = False
400 # Leave the dataset in place since some downstream tests require
401 # something to be present
403 return butler
405 def testDeferredCollectionPassing(self):
406 # Construct a butler with no run or collection, but make it writeable.
407 butler = Butler(self.tmpConfigFile, writeable=True)
408 # Create and register a DatasetType
409 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
410 datasetType = self.addDatasetType("example", dimensions,
411 self.storageClassFactory.getStorageClass("StructuredData"),
412 butler.registry)
413 # Add needed Dimensions
414 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
415 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
416 "name": "d-r",
417 "band": "R"})
418 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
419 "name": "fourtwentythree", "physical_filter": "d-r"})
420 dataId = {"instrument": "DummyCamComp", "visit": 423}
421 # Create dataset.
422 metric = makeExampleMetrics()
423 # Register a new run and put dataset.
424 run = "deferred"
425 butler.registry.registerRun(run)
426 ref = butler.put(metric, datasetType, dataId, run=run)
427 # Putting with no run should fail with TypeError.
428 with self.assertRaises(TypeError):
429 butler.put(metric, datasetType, dataId)
430 # Dataset should exist.
431 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
432 # We should be able to get the dataset back, but with and without
433 # a deferred dataset handle.
434 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
435 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
436 # Trying to find the dataset without any collection is a TypeError.
437 with self.assertRaises(TypeError):
438 butler.datasetExists(datasetType, dataId)
439 with self.assertRaises(TypeError):
440 butler.get(datasetType, dataId)
441 # Associate the dataset with a different collection.
442 butler.registry.registerCollection("tagged")
443 butler.registry.associate("tagged", [ref])
444 # Deleting the dataset from the new collection should make it findable
445 # in the original collection.
446 butler.pruneDatasets([ref], tags=["tagged"])
447 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
450class ButlerTests(ButlerPutGetTests):
451 """Tests for Butler.
452 """
453 useTempRoot = True
455 def setUp(self):
456 """Create a new butler root for each test."""
457 self.root = makeTestTempDir(TESTDIR)
458 Butler.makeRepo(self.root, config=Config(self.configFile))
459 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
461 def testConstructor(self):
462 """Independent test of constructor.
463 """
464 butler = Butler(self.tmpConfigFile, run="ingest")
465 self.assertIsInstance(butler, Butler)
467 collections = set(butler.registry.queryCollections())
468 self.assertEqual(collections, {"ingest"})
470 butler2 = Butler(butler=butler, collections=["other"])
471 self.assertEqual(
472 butler2.collections,
473 CollectionSearch.fromExpression(["other"])
474 )
475 self.assertIsNone(butler2.run)
476 self.assertIs(butler.datastore, butler2.datastore)
478 def testBasicPutGet(self):
479 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
480 self.runPutGetTest(storageClass, "test_metric")
482 def testCompositePutGetConcrete(self):
484 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
485 butler = self.runPutGetTest(storageClass, "test_metric")
487 # Should *not* be disassembled
488 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
489 self.assertEqual(len(datasets), 1)
490 uri, components = butler.getURIs(datasets[0])
491 self.assertIsInstance(uri, ButlerURI)
492 self.assertFalse(components)
493 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
494 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
496 # Predicted dataset
497 dataId = {"instrument": "DummyCamComp", "visit": 424}
498 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
499 self.assertFalse(components)
500 self.assertIsInstance(uri, ButlerURI)
501 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
502 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
504 def testCompositePutGetVirtual(self):
505 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
506 butler = self.runPutGetTest(storageClass, "test_metric_comp")
508 # Should be disassembled
509 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
510 self.assertEqual(len(datasets), 1)
511 uri, components = butler.getURIs(datasets[0])
513 if butler.datastore.isEphemeral:
514 # Never disassemble in-memory datastore
515 self.assertIsInstance(uri, ButlerURI)
516 self.assertFalse(components)
517 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
518 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
519 else:
520 self.assertIsNone(uri)
521 self.assertEqual(set(components), set(storageClass.components))
522 for compuri in components.values():
523 self.assertIsInstance(compuri, ButlerURI)
524 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
525 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
527 # Predicted dataset
528 dataId = {"instrument": "DummyCamComp", "visit": 424}
529 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
531 if butler.datastore.isEphemeral:
532 # Never disassembled
533 self.assertIsInstance(uri, ButlerURI)
534 self.assertFalse(components)
535 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
536 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
537 else:
538 self.assertIsNone(uri)
539 self.assertEqual(set(components), set(storageClass.components))
540 for compuri in components.values():
541 self.assertIsInstance(compuri, ButlerURI)
542 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
543 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
545 def testIngest(self):
546 butler = Butler(self.tmpConfigFile, run="ingest")
548 # Create and register a DatasetType
549 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
551 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
552 datasetTypeName = "metric"
554 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
556 # Add needed Dimensions
557 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
558 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
559 "name": "d-r",
560 "band": "R"})
561 for detector in (1, 2):
562 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
563 "full_name": f"detector{detector}"})
565 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
566 "name": "fourtwentythree", "physical_filter": "d-r"},
567 {"instrument": "DummyCamComp", "id": 424,
568 "name": "fourtwentyfour", "physical_filter": "d-r"})
570 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
571 dataRoot = os.path.join(TESTDIR, "data", "basic")
572 datasets = []
573 for detector in (1, 2):
574 detector_name = f"detector_{detector}"
575 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
576 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
577 # Create a DatasetRef for ingest
578 refIn = DatasetRef(datasetType, dataId, id=None)
580 datasets.append(FileDataset(path=metricFile,
581 refs=[refIn],
582 formatter=formatter))
584 butler.ingest(*datasets, transfer="copy")
586 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
587 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
589 metrics1 = butler.get(datasetTypeName, dataId1)
590 metrics2 = butler.get(datasetTypeName, dataId2)
591 self.assertNotEqual(metrics1, metrics2)
593 # Compare URIs
594 uri1 = butler.getURI(datasetTypeName, dataId1)
595 uri2 = butler.getURI(datasetTypeName, dataId2)
596 self.assertNotEqual(uri1, uri2)
598 # Now do a multi-dataset but single file ingest
599 metricFile = os.path.join(dataRoot, "detectors.yaml")
600 refs = []
601 for detector in (1, 2):
602 detector_name = f"detector_{detector}"
603 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
604 # Create a DatasetRef for ingest
605 refs.append(DatasetRef(datasetType, dataId, id=None))
607 datasets = []
608 datasets.append(FileDataset(path=metricFile,
609 refs=refs,
610 formatter=MultiDetectorFormatter))
612 butler.ingest(*datasets, transfer="copy")
614 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
615 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
617 multi1 = butler.get(datasetTypeName, dataId1)
618 multi2 = butler.get(datasetTypeName, dataId2)
620 self.assertEqual(multi1, metrics1)
621 self.assertEqual(multi2, metrics2)
623 # Compare URIs
624 uri1 = butler.getURI(datasetTypeName, dataId1)
625 uri2 = butler.getURI(datasetTypeName, dataId2)
626 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
628 # Test that removing one does not break the second
629 # This line will issue a warning log message for a ChainedDatastore
630 # that uses an InMemoryDatastore since in-memory can not ingest
631 # files.
632 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
633 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
634 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
635 multi2b = butler.get(datasetTypeName, dataId2)
636 self.assertEqual(multi2, multi2b)
638 def testPruneCollections(self):
639 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
640 butler = Butler(self.tmpConfigFile, writeable=True)
641 # Load registry data with dimensions to hang datasets off of.
642 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
643 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
644 # Add some RUN-type collections.
645 run1 = "run1"
646 butler.registry.registerRun(run1)
647 run2 = "run2"
648 butler.registry.registerRun(run2)
649 # put some datasets. ref1 and ref2 have the same data ID, and are in
650 # different runs. ref3 has a different data ID.
651 metric = makeExampleMetrics()
652 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
653 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
654 butler.registry)
655 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
656 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
657 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
659 # Try to delete a RUN collection without purge, or with purge and not
660 # unstore.
661 with self.assertRaises(TypeError):
662 butler.pruneCollection(run1)
663 with self.assertRaises(TypeError):
664 butler.pruneCollection(run2, purge=True)
665 # Add a TAGGED collection and associate ref3 only into it.
666 tag1 = "tag1"
667 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
668 butler.registry.associate(tag1, [ref3])
669 # Add a CHAINED collection that searches run1 and then run2. It
670 # logically contains only ref1, because ref2 is shadowed due to them
671 # having the same data ID and dataset type.
672 chain1 = "chain1"
673 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
674 butler.registry.setCollectionChain(chain1, [run1, run2])
675 # Try to delete RUN collections, which should fail with complete
676 # rollback because they're still referenced by the CHAINED
677 # collection.
678 with self.assertRaises(Exception):
679 butler.pruneCollection(run1, pruge=True, unstore=True)
680 with self.assertRaises(Exception):
681 butler.pruneCollection(run2, pruge=True, unstore=True)
682 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
683 [ref1, ref2, ref3])
684 self.assertTrue(butler.datastore.exists(ref1))
685 self.assertTrue(butler.datastore.exists(ref2))
686 self.assertTrue(butler.datastore.exists(ref3))
687 # Try to delete CHAINED and TAGGED collections with purge; should not
688 # work.
689 with self.assertRaises(TypeError):
690 butler.pruneCollection(tag1, purge=True, unstore=True)
691 with self.assertRaises(TypeError):
692 butler.pruneCollection(chain1, purge=True, unstore=True)
693 # Remove the tagged collection with unstore=False. This should not
694 # affect the datasets.
695 butler.pruneCollection(tag1)
696 with self.assertRaises(MissingCollectionError):
697 butler.registry.getCollectionType(tag1)
698 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
699 [ref1, ref2, ref3])
700 self.assertTrue(butler.datastore.exists(ref1))
701 self.assertTrue(butler.datastore.exists(ref2))
702 self.assertTrue(butler.datastore.exists(ref3))
703 # Add the tagged collection back in, and remove it with unstore=True.
704 # This should remove ref3 only from the datastore.
705 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
706 butler.registry.associate(tag1, [ref3])
707 butler.pruneCollection(tag1, unstore=True)
708 with self.assertRaises(MissingCollectionError):
709 butler.registry.getCollectionType(tag1)
710 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
711 [ref1, ref2, ref3])
712 self.assertTrue(butler.datastore.exists(ref1))
713 self.assertTrue(butler.datastore.exists(ref2))
714 self.assertFalse(butler.datastore.exists(ref3))
715 # Delete the chain with unstore=False. The datasets should not be
716 # affected at all.
717 butler.pruneCollection(chain1)
718 with self.assertRaises(MissingCollectionError):
719 butler.registry.getCollectionType(chain1)
720 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
721 [ref1, ref2, ref3])
722 self.assertTrue(butler.datastore.exists(ref1))
723 self.assertTrue(butler.datastore.exists(ref2))
724 self.assertFalse(butler.datastore.exists(ref3))
725 # Redefine and then delete the chain with unstore=True. Only ref1
726 # should be unstored (ref3 has already been unstored, but otherwise
727 # would be now).
728 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
729 butler.registry.setCollectionChain(chain1, [run1, run2])
730 butler.pruneCollection(chain1, unstore=True)
731 with self.assertRaises(MissingCollectionError):
732 butler.registry.getCollectionType(chain1)
733 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
734 [ref1, ref2, ref3])
735 self.assertFalse(butler.datastore.exists(ref1))
736 self.assertTrue(butler.datastore.exists(ref2))
737 self.assertFalse(butler.datastore.exists(ref3))
738 # Remove run1. This removes ref1 and ref3 from the registry (they're
739 # already gone from the datastore, which is fine).
740 butler.pruneCollection(run1, purge=True, unstore=True)
741 with self.assertRaises(MissingCollectionError):
742 butler.registry.getCollectionType(run1)
743 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
744 [ref2])
745 self.assertTrue(butler.datastore.exists(ref2))
746 # Remove run2. This removes ref2 from the registry and the datastore.
747 butler.pruneCollection(run2, purge=True, unstore=True)
748 with self.assertRaises(MissingCollectionError):
749 butler.registry.getCollectionType(run2)
750 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
751 [])
753 # Now that the collections have been pruned we can remove the
754 # dataset type
755 butler.registry.removeDatasetType(datasetType.name)
757 def testPickle(self):
758 """Test pickle support.
759 """
760 butler = Butler(self.tmpConfigFile, run="ingest")
761 butlerOut = pickle.loads(pickle.dumps(butler))
762 self.assertIsInstance(butlerOut, Butler)
763 self.assertEqual(butlerOut._config, butler._config)
764 self.assertEqual(butlerOut.collections, butler.collections)
765 self.assertEqual(butlerOut.run, butler.run)
767 def testGetDatasetTypes(self):
768 butler = Butler(self.tmpConfigFile, run="ingest")
769 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
770 dimensionEntries = [
771 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
772 {"instrument": "DummyCamComp"}),
773 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
774 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
775 ]
776 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
777 # Add needed Dimensions
778 for args in dimensionEntries:
779 butler.registry.insertDimensionData(*args)
781 # When a DatasetType is added to the registry entries are not created
782 # for components but querying them can return the components.
783 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
784 components = set()
785 for datasetTypeName in datasetTypeNames:
786 # Create and register a DatasetType
787 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
789 for componentName in storageClass.components:
790 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
792 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
793 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
795 # Now that we have some dataset types registered, validate them
796 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
797 "datasetType.component", "random_data", "random_data_2"])
799 # Add a new datasetType that will fail template validation
800 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
801 if self.validationCanFail:
802 with self.assertRaises(ValidationError):
803 butler.validateConfiguration()
805 # Rerun validation but with a subset of dataset type names
806 butler.validateConfiguration(datasetTypeNames=["metric4"])
808 # Rerun validation but ignore the bad datasetType
809 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
810 "datasetType.component", "random_data", "random_data_2"])
812 def testTransaction(self):
813 butler = Butler(self.tmpConfigFile, run="ingest")
814 datasetTypeName = "test_metric"
815 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
816 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
817 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
818 "band": "R"}),
819 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
820 "physical_filter": "d-r"}))
821 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
822 metric = makeExampleMetrics()
823 dataId = {"instrument": "DummyCam", "visit": 42}
824 # Create and register a DatasetType
825 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
826 with self.assertRaises(TransactionTestError):
827 with butler.transaction():
828 # Add needed Dimensions
829 for args in dimensionEntries:
830 butler.registry.insertDimensionData(*args)
831 # Store a dataset
832 ref = butler.put(metric, datasetTypeName, dataId)
833 self.assertIsInstance(ref, DatasetRef)
834 # Test getDirect
835 metricOut = butler.getDirect(ref)
836 self.assertEqual(metric, metricOut)
837 # Test get
838 metricOut = butler.get(datasetTypeName, dataId)
839 self.assertEqual(metric, metricOut)
840 # Check we can get components
841 self.assertGetComponents(butler, ref,
842 ("summary", "data", "output"), metric)
843 raise TransactionTestError("This should roll back the entire transaction")
844 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
845 butler.registry.expandDataId(dataId)
846 # Should raise LookupError for missing data ID value
847 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
848 butler.get(datasetTypeName, dataId)
849 # Also check explicitly if Dataset entry is missing
850 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
851 # Direct retrieval should not find the file in the Datastore
852 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
853 butler.getDirect(ref)
855 def testMakeRepo(self):
856 """Test that we can write butler configuration to a new repository via
857 the Butler.makeRepo interface and then instantiate a butler from the
858 repo root.
859 """
860 # Do not run the test if we know this datastore configuration does
861 # not support a file system root
862 if self.fullConfigKey is None:
863 return
865 # create two separate directories
866 root1 = tempfile.mkdtemp(dir=self.root)
867 root2 = tempfile.mkdtemp(dir=self.root)
869 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
870 limited = Config(self.configFile)
871 butler1 = Butler(butlerConfig)
872 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
873 full = Config(self.tmpConfigFile)
874 butler2 = Butler(butlerConfig)
875 # Butlers should have the same configuration regardless of whether
876 # defaults were expanded.
877 self.assertEqual(butler1._config, butler2._config)
878 # Config files loaded directly should not be the same.
879 self.assertNotEqual(limited, full)
880 # Make sure "limited" doesn't have a few keys we know it should be
881 # inheriting from defaults.
882 self.assertIn(self.fullConfigKey, full)
883 self.assertNotIn(self.fullConfigKey, limited)
885 # Collections don't appear until something is put in them
886 collections1 = set(butler1.registry.queryCollections())
887 self.assertEqual(collections1, set())
888 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
890 # Check that a config with no associated file name will not
891 # work properly with relocatable Butler repo
892 butlerConfig.configFile = None
893 with self.assertRaises(ValueError):
894 Butler(butlerConfig)
896 with self.assertRaises(FileExistsError):
897 Butler.makeRepo(self.root, standalone=True,
898 config=Config(self.configFile), overwrite=False)
900 def testStringification(self):
901 butler = Butler(self.tmpConfigFile, run="ingest")
902 butlerStr = str(butler)
904 if self.datastoreStr is not None:
905 for testStr in self.datastoreStr:
906 self.assertIn(testStr, butlerStr)
907 if self.registryStr is not None:
908 self.assertIn(self.registryStr, butlerStr)
910 datastoreName = butler.datastore.name
911 if self.datastoreName is not None:
912 for testStr in self.datastoreName:
913 self.assertIn(testStr, datastoreName)
916class FileDatastoreButlerTests(ButlerTests):
917 """Common tests and specialization of ButlerTests for butlers backed
918 by datastores that inherit from FileDatastore.
919 """
921 def checkFileExists(self, root, relpath):
922 """Checks if file exists at a given path (relative to root).
924 Test testPutTemplates verifies actual physical existance of the files
925 in the requested location.
926 """
927 uri = ButlerURI(root, forceDirectory=True)
928 return uri.join(relpath).exists()
930 def testPutTemplates(self):
931 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
932 butler = Butler(self.tmpConfigFile, run="ingest")
934 # Add needed Dimensions
935 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
936 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
937 "name": "d-r",
938 "band": "R"})
939 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
940 "physical_filter": "d-r"})
941 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
942 "physical_filter": "d-r"})
944 # Create and store a dataset
945 metric = makeExampleMetrics()
947 # Create two almost-identical DatasetTypes (both will use default
948 # template)
949 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
950 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
951 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
952 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
954 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
955 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
957 # Put with exactly the data ID keys needed
958 ref = butler.put(metric, "metric1", dataId1)
959 uri = butler.getURI(ref)
960 self.assertTrue(self.checkFileExists(butler.datastore.root,
961 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
962 f"Checking existence of {uri}")
964 # Check the template based on dimensions
965 butler.datastore.templates.validateTemplates([ref])
967 # Put with extra data ID keys (physical_filter is an optional
968 # dependency); should not change template (at least the way we're
969 # defining them to behave now; the important thing is that they
970 # must be consistent).
971 ref = butler.put(metric, "metric2", dataId2)
972 uri = butler.getURI(ref)
973 self.assertTrue(self.checkFileExists(butler.datastore.root,
974 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
975 f"Checking existence of {uri}")
977 # Check the template based on dimensions
978 butler.datastore.templates.validateTemplates([ref])
980 # Now use a file template that will not result in unique filenames
981 with self.assertRaises(FileTemplateValidationError):
982 butler.put(metric, "metric3", dataId1)
984 def testImportExport(self):
985 # Run put/get tests just to create and populate a repo.
986 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
987 self.runImportExportTest(storageClass)
989 @unittest.expectedFailure
990 def testImportExportVirtualComposite(self):
991 # Run put/get tests just to create and populate a repo.
992 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
993 self.runImportExportTest(storageClass)
995 def runImportExportTest(self, storageClass):
996 """This test does an export to a temp directory and an import back
997 into a new temp directory repo. It does not assume a posix datastore"""
998 exportButler = self.runPutGetTest(storageClass, "test_metric")
999 print("Root:", exportButler.datastore.root)
1000 # Test that the repo actually has at least one dataset.
1001 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1002 self.assertGreater(len(datasets), 0)
1003 # Add a DimensionRecord that's unused by those datasets.
1004 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1005 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1006 # Export and then import datasets.
1007 with safeTestTempDir(TESTDIR) as exportDir:
1008 exportFile = os.path.join(exportDir, "exports.yaml")
1009 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1010 export.saveDatasets(datasets)
1011 # Export the same datasets again. This should quietly do
1012 # nothing because of internal deduplication, and it shouldn't
1013 # complain about being asked to export the "htm7" elements even
1014 # though there aren't any in these datasets or in the database.
1015 export.saveDatasets(datasets, elements=["htm7"])
1016 # Save one of the data IDs again; this should be harmless
1017 # because of internal deduplication.
1018 export.saveDataIds([datasets[0].dataId])
1019 # Save some dimension records directly.
1020 export.saveDimensionData("skymap", [skymapRecord])
1021 self.assertTrue(os.path.exists(exportFile))
1022 with safeTestTempDir(TESTDIR) as importDir:
1023 # We always want this to be a local posix butler
1024 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1025 # Calling script.butlerImport tests the implementation of the
1026 # butler command line interface "import" subcommand. Functions
1027 # in the script folder are generally considered protected and
1028 # should not be used as public api.
1029 with open(exportFile, "r") as f:
1030 script.butlerImport(importDir, export_file=f, directory=exportDir,
1031 transfer="auto", skip_dimensions=None, reuse_ids=False)
1032 importButler = Butler(importDir, run="ingest")
1033 for ref in datasets:
1034 with self.subTest(ref=ref):
1035 # Test for existence by passing in the DatasetType and
1036 # data ID separately, to avoid lookup by dataset_id.
1037 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
1038 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
1039 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
1041 def testRemoveRuns(self):
1042 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1043 butler = Butler(self.tmpConfigFile, writeable=True)
1044 # Load registry data with dimensions to hang datasets off of.
1045 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1046 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1047 # Add some RUN-type collection.
1048 run1 = "run1"
1049 butler.registry.registerRun(run1)
1050 run2 = "run2"
1051 butler.registry.registerRun(run2)
1052 # put a dataset in each
1053 metric = makeExampleMetrics()
1054 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1055 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
1056 butler.registry)
1057 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1058 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1059 uri1 = butler.getURI(ref1, collections=[run1])
1060 uri2 = butler.getURI(ref2, collections=[run2])
1061 # Remove from both runs with different values for unstore.
1062 butler.removeRuns([run1], unstore=True)
1063 butler.removeRuns([run2], unstore=False)
1064 # Should be nothing in registry for either one, and datastore should
1065 # not think either exists.
1066 with self.assertRaises(MissingCollectionError):
1067 butler.registry.getCollectionType(run1)
1068 with self.assertRaises(MissingCollectionError):
1069 butler.registry.getCollectionType(run2)
1070 self.assertFalse(butler.datastore.exists(ref1))
1071 self.assertFalse(butler.datastore.exists(ref2))
1072 # The ref we unstored should be gone according to the URI, but the
1073 # one we forgot should still be around.
1074 self.assertFalse(uri1.exists())
1075 self.assertTrue(uri2.exists())
1078class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1079 """PosixDatastore specialization of a butler"""
1080 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1081 fullConfigKey = ".datastore.formatters"
1082 validationCanFail = True
1083 datastoreStr = ["/tmp"]
1084 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1085 registryStr = "/gen3.sqlite3"
1087 def testExportTransferCopy(self):
1088 """Test local export using all transfer modes"""
1089 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1090 exportButler = self.runPutGetTest(storageClass, "test_metric")
1091 # Test that the repo actually has at least one dataset.
1092 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1093 self.assertGreater(len(datasets), 0)
1094 uris = [exportButler.getURI(d) for d in datasets]
1095 datastoreRoot = exportButler.datastore.root
1097 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1099 for path in pathsInStore:
1100 # Assume local file system
1101 self.assertTrue(self.checkFileExists(datastoreRoot, path),
1102 f"Checking path {path}")
1104 for transfer in ("copy", "link", "symlink", "relsymlink"):
1105 with safeTestTempDir(TESTDIR) as exportDir:
1106 with exportButler.export(directory=exportDir, format="yaml",
1107 transfer=transfer) as export:
1108 export.saveDatasets(datasets)
1109 for path in pathsInStore:
1110 self.assertTrue(self.checkFileExists(exportDir, path),
1111 f"Check that mode {transfer} exported files")
1114class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1115 """InMemoryDatastore specialization of a butler"""
1116 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1117 fullConfigKey = None
1118 useTempRoot = False
1119 validationCanFail = False
1120 datastoreStr = ["datastore='InMemory"]
1121 datastoreName = ["InMemoryDatastore@"]
1122 registryStr = "/gen3.sqlite3"
1124 def testIngest(self):
1125 pass
1128class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1129 """PosixDatastore specialization"""
1130 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1131 fullConfigKey = ".datastore.datastores.1.formatters"
1132 validationCanFail = True
1133 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1134 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1135 "SecondDatastore"]
1136 registryStr = "/gen3.sqlite3"
1139class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1140 """Test that a yaml file in one location can refer to a root in another."""
1142 datastoreStr = ["dir1"]
1143 # Disable the makeRepo test since we are deliberately not using
1144 # butler.yaml as the config name.
1145 fullConfigKey = None
1147 def setUp(self):
1148 self.root = makeTestTempDir(TESTDIR)
1150 # Make a new repository in one place
1151 self.dir1 = os.path.join(self.root, "dir1")
1152 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1154 # Move the yaml file to a different place and add a "root"
1155 self.dir2 = os.path.join(self.root, "dir2")
1156 safeMakeDir(self.dir2)
1157 configFile1 = os.path.join(self.dir1, "butler.yaml")
1158 config = Config(configFile1)
1159 config["root"] = self.dir1
1160 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1161 config.dumpToUri(configFile2)
1162 os.remove(configFile1)
1163 self.tmpConfigFile = configFile2
1165 def testFileLocations(self):
1166 self.assertNotEqual(self.dir1, self.dir2)
1167 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1168 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1169 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1172class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1173 """Test that a config file created by makeRepo outside of repo works."""
1175 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1177 def setUp(self):
1178 self.root = makeTestTempDir(TESTDIR)
1179 self.root2 = makeTestTempDir(TESTDIR)
1181 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1182 Butler.makeRepo(self.root, config=Config(self.configFile),
1183 outfile=self.tmpConfigFile)
1185 def tearDown(self):
1186 if os.path.exists(self.root2):
1187 shutil.rmtree(self.root2, ignore_errors=True)
1188 super().tearDown()
1190 def testConfigExistence(self):
1191 c = Config(self.tmpConfigFile)
1192 uri_config = ButlerURI(c["root"])
1193 uri_expected = ButlerURI(self.root, forceDirectory=True)
1194 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1195 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1197 def testPutGet(self):
1198 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1199 self.runPutGetTest(storageClass, "test_metric")
1202class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1203 """Test that a config file created by makeRepo outside of repo works."""
1205 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1207 def setUp(self):
1208 self.root = makeTestTempDir(TESTDIR)
1209 self.root2 = makeTestTempDir(TESTDIR)
1211 self.tmpConfigFile = self.root2
1212 Butler.makeRepo(self.root, config=Config(self.configFile),
1213 outfile=self.tmpConfigFile)
1215 def testConfigExistence(self):
1216 # Append the yaml file else Config constructor does not know the file
1217 # type.
1218 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1219 super().testConfigExistence()
1222class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1223 """Test that a config file created by makeRepo outside of repo works."""
1225 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1227 def setUp(self):
1228 self.root = makeTestTempDir(TESTDIR)
1229 self.root2 = makeTestTempDir(TESTDIR)
1231 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1232 Butler.makeRepo(self.root, config=Config(self.configFile),
1233 outfile=self.tmpConfigFile)
1236@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1237@mock_s3
1238class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1239 """S3Datastore specialization of a butler; an S3 storage Datastore +
1240 a local in-memory SqlRegistry.
1241 """
1242 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1243 fullConfigKey = None
1244 validationCanFail = True
1246 bucketName = "anybucketname"
1247 """Name of the Bucket that will be used in the tests. The name is read from
1248 the config file used with the tests during set-up.
1249 """
1251 root = "butlerRoot/"
1252 """Root repository directory expected to be used in case useTempRoot=False.
1253 Otherwise the root is set to a 20 characters long randomly generated string
1254 during set-up.
1255 """
1257 datastoreStr = [f"datastore={root}"]
1258 """Contains all expected root locations in a format expected to be
1259 returned by Butler stringification.
1260 """
1262 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1263 """The expected format of the S3 Datastore string."""
1265 registryStr = "/gen3.sqlite3"
1266 """Expected format of the Registry string."""
1268 def genRoot(self):
1269 """Returns a random string of len 20 to serve as a root
1270 name for the temporary bucket repo.
1272 This is equivalent to tempfile.mkdtemp as this is what self.root
1273 becomes when useTempRoot is True.
1274 """
1275 rndstr = "".join(
1276 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1277 )
1278 return rndstr + "/"
1280 def setUp(self):
1281 config = Config(self.configFile)
1282 uri = ButlerURI(config[".datastore.datastore.root"])
1283 self.bucketName = uri.netloc
1285 # set up some fake credentials if they do not exist
1286 self.usingDummyCredentials = setAwsEnvCredentials()
1288 if self.useTempRoot:
1289 self.root = self.genRoot()
1290 rooturi = f"s3://{self.bucketName}/{self.root}"
1291 config.update({"datastore": {"datastore": {"root": rooturi}}})
1293 # need local folder to store registry database
1294 self.reg_dir = makeTestTempDir(TESTDIR)
1295 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1297 # MOTO needs to know that we expect Bucket bucketname to exist
1298 # (this used to be the class attribute bucketName)
1299 s3 = boto3.resource("s3")
1300 s3.create_bucket(Bucket=self.bucketName)
1302 self.datastoreStr = f"datastore={self.root}"
1303 self.datastoreName = [f"FileDatastore@{rooturi}"]
1304 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1305 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1307 def tearDown(self):
1308 s3 = boto3.resource("s3")
1309 bucket = s3.Bucket(self.bucketName)
1310 try:
1311 bucket.objects.all().delete()
1312 except botocore.exceptions.ClientError as e:
1313 if e.response["Error"]["Code"] == "404":
1314 # the key was not reachable - pass
1315 pass
1316 else:
1317 raise
1319 bucket = s3.Bucket(self.bucketName)
1320 bucket.delete()
1322 # unset any potentially set dummy credentials
1323 if self.usingDummyCredentials:
1324 unsetAwsEnvCredentials()
1326 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1327 shutil.rmtree(self.reg_dir, ignore_errors=True)
1329 if self.useTempRoot and os.path.exists(self.root):
1330 shutil.rmtree(self.root, ignore_errors=True)
1333@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1334# Mock required environment variables during tests
1335@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1336 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1337 TESTDIR, "config/testConfigs/webdav/token"),
1338 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1339class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1340 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1341 a local in-memory SqlRegistry.
1342 """
1343 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1344 fullConfigKey = None
1345 validationCanFail = True
1347 serverName = "localhost"
1348 """Name of the server that will be used in the tests.
1349 """
1351 portNumber = 8080
1352 """Port on which the webdav server listens. Automatically chosen
1353 at setUpClass via the _getfreeport() method
1354 """
1356 root = "butlerRoot/"
1357 """Root repository directory expected to be used in case useTempRoot=False.
1358 Otherwise the root is set to a 20 characters long randomly generated string
1359 during set-up.
1360 """
1362 datastoreStr = [f"datastore={root}"]
1363 """Contains all expected root locations in a format expected to be
1364 returned by Butler stringification.
1365 """
1367 datastoreName = ["FileDatastore@https://{serverName}/{root}"]
1368 """The expected format of the WebdavDatastore string."""
1370 registryStr = "/gen3.sqlite3"
1371 """Expected format of the Registry string."""
1373 serverThread = None
1374 """Thread in which the local webdav server will run"""
1376 stopWebdavServer = False
1377 """This flag will cause the webdav server to
1378 gracefully shut down when True
1379 """
1381 def genRoot(self):
1382 """Returns a random string of len 20 to serve as a root
1383 name for the temporary bucket repo.
1385 This is equivalent to tempfile.mkdtemp as this is what self.root
1386 becomes when useTempRoot is True.
1387 """
1388 rndstr = "".join(
1389 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1390 )
1391 return rndstr + "/"
1393 @classmethod
1394 def setUpClass(cls):
1395 # Do the same as inherited class
1396 cls.storageClassFactory = StorageClassFactory()
1397 cls.storageClassFactory.addFromConfig(cls.configFile)
1399 cls.portNumber = cls._getfreeport()
1400 # Run a local webdav server on which tests will be run
1401 cls.serverThread = Thread(target=cls._serveWebdav,
1402 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1403 daemon=True)
1404 cls.serverThread.start()
1405 # Wait for it to start
1406 time.sleep(3)
1408 @classmethod
1409 def tearDownClass(cls):
1410 # Ask for graceful shut down of the webdav server
1411 cls.stopWebdavServer = True
1412 # Wait for the thread to exit
1413 cls.serverThread.join()
1415 # Mock required environment variables during tests
1416 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1417 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1418 TESTDIR, "config/testConfigs/webdav/token"),
1419 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1420 def setUp(self):
1421 config = Config(self.configFile)
1423 if self.useTempRoot:
1424 self.root = self.genRoot()
1425 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1426 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1428 # need local folder to store registry database
1429 self.reg_dir = makeTestTempDir(TESTDIR)
1430 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1432 self.datastoreStr = f"datastore={self.root}"
1433 self.datastoreName = [f"FileDatastore@{self.rooturi}"]
1435 if not isWebdavEndpoint(self.rooturi):
1436 raise OSError("Webdav server not running properly: cannot run tests.")
1438 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1439 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1441 # Mock required environment variables during tests
1442 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1443 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1444 TESTDIR, "config/testConfigs/webdav/token"),
1445 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1446 def tearDown(self):
1447 # Clear temporary directory
1448 ButlerURI(self.rooturi).remove()
1449 ButlerURI(self.rooturi).session.close()
1451 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1452 shutil.rmtree(self.reg_dir, ignore_errors=True)
1454 if self.useTempRoot and os.path.exists(self.root):
1455 shutil.rmtree(self.root, ignore_errors=True)
1457 def _serveWebdav(self, port: int, stopWebdavServer):
1458 """Starts a local webdav-compatible HTTP server,
1459 Listening on http://localhost:port
1460 This server only runs when this test class is instantiated,
1461 and then shuts down. Must be started is a separate thread.
1463 Parameters
1464 ----------
1465 port : `int`
1466 The port number on which the server should listen
1467 """
1468 root_path = gettempdir()
1470 config = {
1471 "host": "0.0.0.0",
1472 "port": port,
1473 "provider_mapping": {"/": root_path},
1474 "http_authenticator": {
1475 "domain_controller": None
1476 },
1477 "simple_dc": {"user_mapping": {"*": True}},
1478 "verbose": 0,
1479 }
1480 app = WsgiDAVApp(config)
1482 server_args = {
1483 "bind_addr": (config["host"], config["port"]),
1484 "wsgi_app": app,
1485 }
1486 server = wsgi.Server(**server_args)
1487 server.prepare()
1489 try:
1490 # Start the actual server in a separate thread
1491 t = Thread(target=server.serve, daemon=True)
1492 t.start()
1493 # watch stopWebdavServer, and gracefully
1494 # shut down the server when True
1495 while True:
1496 if stopWebdavServer():
1497 break
1498 time.sleep(1)
1499 except KeyboardInterrupt:
1500 print("Caught Ctrl-C, shutting down...")
1501 finally:
1502 server.stop()
1503 t.join()
1505 def _getfreeport():
1506 """
1507 Determines a free port using sockets.
1508 """
1509 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1510 free_socket.bind(('0.0.0.0', 0))
1511 free_socket.listen()
1512 port = free_socket.getsockname()[1]
1513 free_socket.close()
1514 return port
1517class PosixDatastoreTransfers(unittest.TestCase):
1518 """Test data transfers between butlers.
1520 Test for different managers. UUID to UUID and integer to integer are
1521 tested. UUID to integer is not supported since we do not currently
1522 want to allow that. Integer to UUID is supported with the caveat
1523 that UUID4 will be generated and this will be incorrect for raw
1524 dataset types. The test ignores that.
1525 """
1527 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1529 @classmethod
1530 def setUpClass(cls):
1531 cls.storageClassFactory = StorageClassFactory()
1532 cls.storageClassFactory.addFromConfig(cls.configFile)
1534 def setUp(self):
1535 self.root = makeTestTempDir(TESTDIR)
1536 self.config = Config(self.configFile)
1538 def tearDown(self):
1539 removeTestTempDir(self.root)
1541 def create_butler(self, manager, label):
1542 config = Config(self.configFile)
1543 config["registry", "managers", "datasets"] = manager
1544 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config),
1545 writeable=True)
1547 def create_butlers(self, manager1, manager2):
1548 self.source_butler = self.create_butler(manager1, "1")
1549 self.target_butler = self.create_butler(manager2, "2")
1551 def testTransferUuidToUuid(self):
1552 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1553 "ByDimensionsDatasetRecordStorageManagerUUID",
1554 "lsst.daf.butler.registry.datasets.byDimensions."
1555 "ByDimensionsDatasetRecordStorageManagerUUID",
1556 )
1557 # Setting id_gen_map should have no effect here
1558 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
1560 def testTransferIntToInt(self):
1561 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1562 "ByDimensionsDatasetRecordStorageManager",
1563 "lsst.daf.butler.registry.datasets.byDimensions."
1564 "ByDimensionsDatasetRecordStorageManager",
1565 )
1566 # int dataset ID only allows UNIQUE
1567 self.assertButlerTransfers()
1569 def testTransferIntToUuid(self):
1570 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1571 "ByDimensionsDatasetRecordStorageManager",
1572 "lsst.daf.butler.registry.datasets.byDimensions."
1573 "ByDimensionsDatasetRecordStorageManagerUUID",
1574 )
1575 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
1577 def assertButlerTransfers(self, id_gen_map=None):
1578 """Test that a run can be transferred to another butler."""
1580 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1581 datasetTypeName = "random_data"
1583 # Test will create 3 collections and we will want to transfer
1584 # two of those three.
1585 runs = ["run1", "run2", "other"]
1587 # Also want to use two different dataset types to ensure that
1588 # grouping works.
1589 datasetTypeNames = ["random_data", "random_data_2"]
1591 # Create the run collections in the source butler.
1592 for run in runs:
1593 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
1595 # Create dimensions in both butlers (transfer will not create them).
1596 n_exposures = 30
1597 for butler in (self.source_butler, self.target_butler):
1598 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1599 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
1600 "name": "d-r",
1601 "band": "R"})
1602 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp",
1603 "id": 1, "full_name": "det1"})
1605 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"])
1606 for datasetTypeName in datasetTypeNames:
1607 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1608 butler.registry.registerDatasetType(datasetType)
1610 for i in range(n_exposures):
1611 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp",
1612 "id": i, "obs_id": f"exp{i}",
1613 "physical_filter": "d-r"})
1615 # Write a dataset to an unrelated run -- this will ensure that
1616 # we are rewriting integer dataset ids in the target if necessary.
1617 # Will not be relevant for UUID.
1618 run = "distraction"
1619 butler = Butler(butler=self.source_butler, run=run)
1620 butler.put({"unrelated": 5, "dataset": "test"}, datasetTypeName,
1621 exposure=1, detector=1, instrument="DummyCamComp", physical_filter="d-r")
1623 # Write some example metrics to the source
1624 butler = Butler(butler=self.source_butler)
1626 source_refs = []
1627 for i in range(n_exposures):
1628 # Put a third of datasets into each collection, only retain
1629 # two thirds.
1630 index = i % 3
1631 run = runs[index]
1632 datasetTypeName = datasetTypeNames[i % 2]
1634 metric = {"something": i,
1635 "other": "metric",
1636 "list": [2*x for x in range(i)]}
1637 dataId = {"exposure": i, "detector": 1, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1638 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
1639 if index < 2:
1640 source_refs.append(ref)
1641 new_metric = butler.get(ref.unresolved(), collections=run)
1642 self.assertEqual(new_metric, metric)
1644 # Now transfer them to the second butler
1645 transferred = self.target_butler.transfer_from(self.source_butler, source_refs,
1646 id_gen_map=id_gen_map)
1647 self.assertEqual(len(transferred), 20)
1649 # Now try to get the same refs from the new butler.
1650 for ref in source_refs:
1651 unresolved_ref = ref.unresolved()
1652 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run)
1653 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run)
1654 self.assertEqual(new_metric, old_metric)
1657if __name__ == "__main__": 1657 ↛ 1658line 1657 didn't jump to line 1658, because the condition on line 1657 was never true
1658 unittest.main()