Coverage for tests/test_butler.py: 14%
1251 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-26 02:02 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-26 02:02 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import gc
26import logging
27import os
28import pathlib
29import pickle
30import posixpath
31import random
32import shutil
33import socket
34import string
35import tempfile
36import time
37import unittest
38from tempfile import gettempdir
39from threading import Thread
41try:
42 import boto3
43 import botocore
44 from moto import mock_s3
45except ImportError:
46 boto3 = None
48 def mock_s3(cls):
49 """A no-op decorator in case moto mock_s3 can not be imported."""
50 return cls
53try:
54 # It's possible but silly to have testing.postgresql installed without
55 # having the postgresql server installed (because then nothing in
56 # testing.postgresql would work), so we use the presence of that module
57 # to test whether we can expect the server to be available.
58 import testing.postgresql
59except ImportError:
60 testing = None
63try:
64 from cheroot import wsgi
65 from wsgidav.wsgidav_app import WsgiDAVApp
66except ImportError:
67 WsgiDAVApp = None
69import astropy.time
70import sqlalchemy
71from lsst.daf.butler import (
72 Butler,
73 ButlerConfig,
74 CollectionType,
75 Config,
76 DatasetIdGenEnum,
77 DatasetRef,
78 DatasetType,
79 FileDataset,
80 FileTemplate,
81 FileTemplateValidationError,
82 StorageClassFactory,
83 ValidationError,
84 script,
85)
86from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
87from lsst.daf.butler.registry import (
88 CollectionError,
89 CollectionTypeError,
90 ConflictingDefinitionError,
91 DataIdValueError,
92 MissingCollectionError,
93)
94from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter
95from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir
96from lsst.resources import ResourcePath
97from lsst.resources.http import _is_webdav_endpoint
98from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
99from lsst.utils import doImport
100from lsst.utils.introspection import get_full_type_name
102TESTDIR = os.path.abspath(os.path.dirname(__file__))
105def makeExampleMetrics():
106 return MetricsExample(
107 {"AM1": 5.2, "AM2": 30.6},
108 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
109 [563, 234, 456.7, 752, 8, 9, 27],
110 )
113class TransactionTestError(Exception):
114 """Specific error for testing transactions, to prevent misdiagnosing
115 that might otherwise occur when a standard exception is used.
116 """
118 pass
121class ButlerConfigTests(unittest.TestCase):
122 """Simple tests for ButlerConfig that are not tested in any other test
123 cases."""
125 def testSearchPath(self):
126 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
127 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
128 config1 = ButlerConfig(configFile)
129 self.assertNotIn("testConfigs", "\n".join(cm.output))
131 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
132 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
133 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
134 self.assertIn("testConfigs", "\n".join(cm.output))
136 key = ("datastore", "records", "table")
137 self.assertNotEqual(config1[key], config2[key])
138 self.assertEqual(config2[key], "override_record")
141class ButlerPutGetTests:
142 """Helper method for running a suite of put/get tests from different
143 butler configurations."""
145 root = None
146 default_run = "ingésτ😺"
148 @staticmethod
149 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
150 """Create a DatasetType and register it"""
151 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
152 registry.registerDatasetType(datasetType)
153 return datasetType
155 @classmethod
156 def setUpClass(cls):
157 cls.storageClassFactory = StorageClassFactory()
158 cls.storageClassFactory.addFromConfig(cls.configFile)
160 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
161 datasetType = datasetRef.datasetType
162 dataId = datasetRef.dataId
163 deferred = butler.getDirectDeferred(datasetRef)
165 for component in components:
166 compTypeName = datasetType.componentTypeName(component)
167 result = butler.get(compTypeName, dataId, collections=collections)
168 self.assertEqual(result, getattr(reference, component))
169 result_deferred = deferred.get(component=component)
170 self.assertEqual(result_deferred, result)
172 def tearDown(self):
173 removeTestTempDir(self.root)
175 def create_butler(self, run, storageClass, datasetTypeName):
176 butler = Butler(self.tmpConfigFile, run=run)
178 collections = set(butler.registry.queryCollections())
179 self.assertEqual(collections, set([run]))
181 # Create and register a DatasetType
182 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
184 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
186 # Add needed Dimensions
187 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
188 butler.registry.insertDimensionData(
189 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
190 )
191 butler.registry.insertDimensionData(
192 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"}
193 )
194 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
195 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
196 butler.registry.insertDimensionData(
197 "visit",
198 {
199 "instrument": "DummyCamComp",
200 "id": 423,
201 "name": "fourtwentythree",
202 "physical_filter": "d-r",
203 "visit_system": 1,
204 "datetime_begin": visit_start,
205 "datetime_end": visit_end,
206 },
207 )
209 # Add more visits for some later tests
210 for visit_id in (424, 425):
211 butler.registry.insertDimensionData(
212 "visit",
213 {
214 "instrument": "DummyCamComp",
215 "id": visit_id,
216 "name": f"fourtwentyfour_{visit_id}",
217 "physical_filter": "d-r",
218 "visit_system": 1,
219 },
220 )
221 return butler, datasetType
223 def runPutGetTest(self, storageClass, datasetTypeName):
224 # New datasets will be added to run and tag, but we will only look in
225 # tag when looking up datasets.
226 run = self.default_run
227 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
229 # Create and store a dataset
230 metric = makeExampleMetrics()
231 dataId = {"instrument": "DummyCamComp", "visit": 423}
233 # Create a DatasetRef for put
234 refIn = DatasetRef(datasetType, dataId, id=None)
236 # Put with a preexisting id should fail
237 with self.assertRaises(ValueError):
238 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
240 # Put and remove the dataset once as a DatasetRef, once as a dataId,
241 # and once with a DatasetType
243 # Keep track of any collections we add and do not clean up
244 expected_collections = {run}
246 counter = 0
247 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
248 # Since we are using subTest we can get cascading failures
249 # here with the first attempt failing and the others failing
250 # immediately because the dataset already exists. Work around
251 # this by using a distinct run collection each time
252 counter += 1
253 this_run = f"put_run_{counter}"
254 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
255 expected_collections.update({this_run})
257 with self.subTest(args=args):
258 ref = butler.put(metric, *args, run=this_run)
259 self.assertIsInstance(ref, DatasetRef)
261 # Test getDirect
262 metricOut = butler.getDirect(ref)
263 self.assertEqual(metric, metricOut)
264 # Test get
265 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
266 self.assertEqual(metric, metricOut)
267 # Test get with a datasetRef
268 metricOut = butler.get(ref, collections=this_run)
269 self.assertEqual(metric, metricOut)
270 # Test getDeferred with dataId
271 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
272 self.assertEqual(metric, metricOut)
273 # Test getDeferred with a datasetRef
274 metricOut = butler.getDeferred(ref, collections=this_run).get()
275 self.assertEqual(metric, metricOut)
276 # and deferred direct with ref
277 metricOut = butler.getDirectDeferred(ref).get()
278 self.assertEqual(metric, metricOut)
280 # Check we can get components
281 if storageClass.isComposite():
282 self.assertGetComponents(
283 butler, ref, ("summary", "data", "output"), metric, collections=this_run
284 )
286 # Can the artifacts themselves be retrieved?
287 if not butler.datastore.isEphemeral:
288 root_uri = ResourcePath(self.root)
290 for preserve_path in (True, False):
291 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
292 # Use copy so that we can test that overwrite
293 # protection works (using "auto" for File URIs would
294 # use hard links and subsequent transfer would work
295 # because it knows they are the same file).
296 transferred = butler.retrieveArtifacts(
297 [ref], destination, preserve_path=preserve_path, transfer="copy"
298 )
299 self.assertGreater(len(transferred), 0)
300 artifacts = list(ResourcePath.findFileResources([destination]))
301 self.assertEqual(set(transferred), set(artifacts))
303 for artifact in transferred:
304 path_in_destination = artifact.relative_to(destination)
305 self.assertIsNotNone(path_in_destination)
307 # when path is not preserved there should not be
308 # any path separators.
309 num_seps = path_in_destination.count("/")
310 if preserve_path:
311 self.assertGreater(num_seps, 0)
312 else:
313 self.assertEqual(num_seps, 0)
315 primary_uri, secondary_uris = butler.datastore.getURIs(ref)
316 n_uris = len(secondary_uris)
317 if primary_uri:
318 n_uris += 1
319 self.assertEqual(
320 len(artifacts),
321 n_uris,
322 "Comparing expected artifacts vs actual:"
323 f" {artifacts} vs {primary_uri} and {secondary_uris}",
324 )
326 if preserve_path:
327 # No need to run these twice
328 with self.assertRaises(ValueError):
329 butler.retrieveArtifacts([ref], destination, transfer="move")
331 with self.assertRaises(FileExistsError):
332 butler.retrieveArtifacts([ref], destination)
334 transferred_again = butler.retrieveArtifacts(
335 [ref], destination, preserve_path=preserve_path, overwrite=True
336 )
337 self.assertEqual(set(transferred_again), set(transferred))
339 # Now remove the dataset completely.
340 butler.pruneDatasets([ref], purge=True, unstore=True)
341 # Lookup with original args should still fail.
342 with self.assertRaises(LookupError):
343 butler.datasetExists(*args, collections=this_run)
344 # getDirect() should still fail.
345 with self.assertRaises(FileNotFoundError):
346 butler.getDirect(ref)
347 # Registry shouldn't be able to find it by dataset_id anymore.
348 self.assertIsNone(butler.registry.getDataset(ref.id))
350 # Do explicit registry removal since we know they are
351 # empty
352 butler.registry.removeCollection(this_run)
353 expected_collections.remove(this_run)
355 # Put the dataset again, since the last thing we did was remove it
356 # and we want to use the default collection.
357 ref = butler.put(metric, refIn)
359 # Get with parameters
360 stop = 4
361 sliced = butler.get(ref, parameters={"slice": slice(stop)})
362 self.assertNotEqual(metric, sliced)
363 self.assertEqual(metric.summary, sliced.summary)
364 self.assertEqual(metric.output, sliced.output)
365 self.assertEqual(metric.data[:stop], sliced.data)
366 # getDeferred with parameters
367 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
368 self.assertNotEqual(metric, sliced)
369 self.assertEqual(metric.summary, sliced.summary)
370 self.assertEqual(metric.output, sliced.output)
371 self.assertEqual(metric.data[:stop], sliced.data)
372 # getDeferred with deferred parameters
373 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
374 self.assertNotEqual(metric, sliced)
375 self.assertEqual(metric.summary, sliced.summary)
376 self.assertEqual(metric.output, sliced.output)
377 self.assertEqual(metric.data[:stop], sliced.data)
379 if storageClass.isComposite():
380 # Check that components can be retrieved
381 metricOut = butler.get(ref.datasetType.name, dataId)
382 compNameS = ref.datasetType.componentTypeName("summary")
383 compNameD = ref.datasetType.componentTypeName("data")
384 summary = butler.get(compNameS, dataId)
385 self.assertEqual(summary, metric.summary)
386 data = butler.get(compNameD, dataId)
387 self.assertEqual(data, metric.data)
389 if "counter" in storageClass.derivedComponents:
390 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
391 self.assertEqual(count, len(data))
393 count = butler.get(
394 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)}
395 )
396 self.assertEqual(count, stop)
398 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
399 summary = butler.getDirect(compRef)
400 self.assertEqual(summary, metric.summary)
402 # Create a Dataset type that has the same name but is inconsistent.
403 inconsistentDatasetType = DatasetType(
404 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config")
405 )
407 # Getting with a dataset type that does not match registry fails
408 with self.assertRaises(ValueError):
409 butler.get(inconsistentDatasetType, dataId)
411 # Combining a DatasetRef with a dataId should fail
412 with self.assertRaises(ValueError):
413 butler.get(ref, dataId)
414 # Getting with an explicit ref should fail if the id doesn't match
415 with self.assertRaises(ValueError):
416 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
418 # Getting a dataset with unknown parameters should fail
419 with self.assertRaises(KeyError):
420 butler.get(ref, parameters={"unsupported": True})
422 # Check we have a collection
423 collections = set(butler.registry.queryCollections())
424 self.assertEqual(collections, expected_collections)
426 # Clean up to check that we can remove something that may have
427 # already had a component removed
428 butler.pruneDatasets([ref], unstore=True, purge=True)
430 # Check that we can configure a butler to accept a put even
431 # if it already has the dataset in registry.
432 ref = butler.put(metric, refIn)
434 # Repeat put will fail.
435 with self.assertRaises(ConflictingDefinitionError):
436 butler.put(metric, refIn)
438 # Remove the datastore entry.
439 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
441 # Put will still fail
442 with self.assertRaises(ConflictingDefinitionError):
443 butler.put(metric, refIn)
445 # Allow the put to succeed
446 butler._allow_put_of_predefined_dataset = True
447 ref2 = butler.put(metric, refIn)
448 self.assertEqual(ref2.id, ref.id)
450 # A second put will still fail but with a different exception
451 # than before.
452 with self.assertRaises(ConflictingDefinitionError):
453 butler.put(metric, refIn)
455 # Reset the flag to avoid confusion
456 butler._allow_put_of_predefined_dataset = False
458 # Leave the dataset in place since some downstream tests require
459 # something to be present
461 return butler
463 def testDeferredCollectionPassing(self):
464 # Construct a butler with no run or collection, but make it writeable.
465 butler = Butler(self.tmpConfigFile, writeable=True)
466 # Create and register a DatasetType
467 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
468 datasetType = self.addDatasetType(
469 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry
470 )
471 # Add needed Dimensions
472 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
473 butler.registry.insertDimensionData(
474 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
475 )
476 butler.registry.insertDimensionData(
477 "visit",
478 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
479 )
480 dataId = {"instrument": "DummyCamComp", "visit": 423}
481 # Create dataset.
482 metric = makeExampleMetrics()
483 # Register a new run and put dataset.
484 run = "deferred"
485 self.assertTrue(butler.registry.registerRun(run))
486 # Second time it will be allowed but indicate no-op
487 self.assertFalse(butler.registry.registerRun(run))
488 ref = butler.put(metric, datasetType, dataId, run=run)
489 # Putting with no run should fail with TypeError.
490 with self.assertRaises(CollectionError):
491 butler.put(metric, datasetType, dataId)
492 # Dataset should exist.
493 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
494 # We should be able to get the dataset back, but with and without
495 # a deferred dataset handle.
496 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
497 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
498 # Trying to find the dataset without any collection is a TypeError.
499 with self.assertRaises(CollectionError):
500 butler.datasetExists(datasetType, dataId)
501 with self.assertRaises(CollectionError):
502 butler.get(datasetType, dataId)
503 # Associate the dataset with a different collection.
504 butler.registry.registerCollection("tagged")
505 butler.registry.associate("tagged", [ref])
506 # Deleting the dataset from the new collection should make it findable
507 # in the original collection.
508 butler.pruneDatasets([ref], tags=["tagged"])
509 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
512class ButlerTests(ButlerPutGetTests):
513 """Tests for Butler."""
515 useTempRoot = True
517 def setUp(self):
518 """Create a new butler root for each test."""
519 self.root = makeTestTempDir(TESTDIR)
520 Butler.makeRepo(self.root, config=Config(self.configFile))
521 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
523 def testConstructor(self):
524 """Independent test of constructor."""
525 butler = Butler(self.tmpConfigFile, run=self.default_run)
526 self.assertIsInstance(butler, Butler)
528 # Check that butler.yaml is added automatically.
529 if self.tmpConfigFile.endswith(end := "/butler.yaml"):
530 config_dir = self.tmpConfigFile[: -len(end)]
531 butler = Butler(config_dir, run=self.default_run)
532 self.assertIsInstance(butler, Butler)
534 # Even with a ResourcePath.
535 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
536 self.assertIsInstance(butler, Butler)
538 collections = set(butler.registry.queryCollections())
539 self.assertEqual(collections, {self.default_run})
541 # Check that some special characters can be included in run name.
542 special_run = "u@b.c-A"
543 butler_special = Butler(butler=butler, run=special_run)
544 collections = set(butler_special.registry.queryCollections("*@*"))
545 self.assertEqual(collections, {special_run})
547 butler2 = Butler(butler=butler, collections=["other"])
548 self.assertEqual(butler2.collections, ("other",))
549 self.assertIsNone(butler2.run)
550 self.assertIs(butler.datastore, butler2.datastore)
552 # Test that we can use an environment variable to find this
553 # repository.
554 butler_index = Config()
555 butler_index["label"] = self.tmpConfigFile
556 for suffix in (".yaml", ".json"):
557 # Ensure that the content differs so that we know that
558 # we aren't reusing the cache.
559 bad_label = f"s3://bucket/not_real{suffix}"
560 butler_index["bad_label"] = bad_label
561 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
562 butler_index.dumpToUri(temp_file)
563 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
564 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label")))
565 uri = Butler.get_repo_uri("bad_label")
566 self.assertEqual(uri, ResourcePath(bad_label))
567 uri = Butler.get_repo_uri("label")
568 butler = Butler(uri, writeable=False)
569 self.assertIsInstance(butler, Butler)
570 butler = Butler("label", writeable=False)
571 self.assertIsInstance(butler, Butler)
572 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
573 Butler("not_there", writeable=False)
574 with self.assertRaises(KeyError) as cm:
575 Butler.get_repo_uri("missing")
576 self.assertIn("not known to", str(cm.exception))
577 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
578 with self.assertRaises(FileNotFoundError):
579 Butler.get_repo_uri("label")
580 self.assertEqual(Butler.get_known_repos(), set())
581 with self.assertRaises(KeyError) as cm:
582 # No environment variable set.
583 Butler.get_repo_uri("label")
584 self.assertIn("No repository index defined", str(cm.exception))
585 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"):
586 # No aliases registered.
587 Butler("not_there")
588 self.assertEqual(Butler.get_known_repos(), set())
590 def testBasicPutGet(self):
591 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
592 self.runPutGetTest(storageClass, "test_metric")
594 def testCompositePutGetConcrete(self):
596 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
597 butler = self.runPutGetTest(storageClass, "test_metric")
599 # Should *not* be disassembled
600 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
601 self.assertEqual(len(datasets), 1)
602 uri, components = butler.getURIs(datasets[0])
603 self.assertIsInstance(uri, ResourcePath)
604 self.assertFalse(components)
605 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
606 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
608 # Predicted dataset
609 dataId = {"instrument": "DummyCamComp", "visit": 424}
610 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
611 self.assertFalse(components)
612 self.assertIsInstance(uri, ResourcePath)
613 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
614 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
616 def testCompositePutGetVirtual(self):
617 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
618 butler = self.runPutGetTest(storageClass, "test_metric_comp")
620 # Should be disassembled
621 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
622 self.assertEqual(len(datasets), 1)
623 uri, components = butler.getURIs(datasets[0])
625 if butler.datastore.isEphemeral:
626 # Never disassemble in-memory datastore
627 self.assertIsInstance(uri, ResourcePath)
628 self.assertFalse(components)
629 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
630 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
631 else:
632 self.assertIsNone(uri)
633 self.assertEqual(set(components), set(storageClass.components))
634 for compuri in components.values():
635 self.assertIsInstance(compuri, ResourcePath)
636 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
637 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
639 # Predicted dataset
640 dataId = {"instrument": "DummyCamComp", "visit": 424}
641 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
643 if butler.datastore.isEphemeral:
644 # Never disassembled
645 self.assertIsInstance(uri, ResourcePath)
646 self.assertFalse(components)
647 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
648 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
649 else:
650 self.assertIsNone(uri)
651 self.assertEqual(set(components), set(storageClass.components))
652 for compuri in components.values():
653 self.assertIsInstance(compuri, ResourcePath)
654 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
655 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
657 def testStorageClassOverrideGet(self):
658 """Test storage class conversion on get with override."""
659 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
660 datasetTypeName = "anything"
661 run = self.default_run
663 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
665 # Create and store a dataset.
666 metric = makeExampleMetrics()
667 dataId = {"instrument": "DummyCamComp", "visit": 423}
669 ref = butler.put(metric, datasetType, dataId)
671 # Return native type.
672 retrieved = butler.get(ref)
673 self.assertEqual(retrieved, metric)
675 # Specify an override.
676 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
677 model = butler.getDirect(ref, storageClass=new_sc)
678 self.assertNotEqual(type(model), type(retrieved))
679 self.assertIs(type(model), new_sc.pytype)
680 self.assertEqual(retrieved, model)
682 # Defer but override later.
683 deferred = butler.getDirectDeferred(ref)
684 model = deferred.get(storageClass=new_sc)
685 self.assertIs(type(model), new_sc.pytype)
686 self.assertEqual(retrieved, model)
688 # Defer but override up front.
689 deferred = butler.getDirectDeferred(ref, storageClass=new_sc)
690 model = deferred.get()
691 self.assertIs(type(model), new_sc.pytype)
692 self.assertEqual(retrieved, model)
694 # Retrieve a component. Should be a tuple.
695 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple")
696 self.assertIs(type(data), tuple)
697 self.assertEqual(data, tuple(retrieved.data))
699 # Parameter on the write storage class should work regardless
700 # of read storage class.
701 data = butler.get(
702 "anything.data",
703 dataId,
704 storageClass="StructuredDataDataTestTuple",
705 parameters={"slice": slice(2, 4)},
706 )
707 self.assertEqual(len(data), 2)
709 # Try a parameter that is known to the read storage class but not
710 # the write storage class.
711 with self.assertRaises(KeyError):
712 butler.get(
713 "anything.data",
714 dataId,
715 storageClass="StructuredDataDataTestTuple",
716 parameters={"xslice": slice(2, 4)},
717 )
719 def testPytypePutCoercion(self):
720 """Test python type coercion on Butler.get and put."""
722 # Store some data with the normal example storage class.
723 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
724 datasetTypeName = "test_metric"
725 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)
727 dataId = {"instrument": "DummyCamComp", "visit": 423}
729 # Put a dict and this should coerce to a MetricsExample
730 test_dict = {"summary": {"a": 1}, "output": {"b": 2}}
731 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424)
732 test_metric = butler.getDirect(metric_ref)
733 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample")
734 self.assertEqual(test_metric.summary, test_dict["summary"])
735 self.assertEqual(test_metric.output, test_dict["output"])
737 # Check that the put still works if a DatasetType is given with
738 # a definition matching this python type.
739 registry_type = butler.registry.getDatasetType(datasetTypeName)
740 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson")
741 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425)
742 self.assertEqual(metric2_ref.datasetType, registry_type)
744 # The get will return the type expected by registry.
745 test_metric2 = butler.getDirect(metric2_ref)
746 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample")
748 # Make a new DatasetRef with the compatible but different DatasetType.
749 # This should now return a dict.
750 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run)
751 test_dict2 = butler.getDirect(new_ref)
752 self.assertEqual(get_full_type_name(test_dict2), "dict")
754 # Get it again with the wrong dataset type definition using get()
755 # rather than getDirect(). This should be consistent with getDirect()
756 # behavior and return the type of the DatasetType.
757 test_dict3 = butler.get(this_type, dataId=dataId, visit=425)
758 self.assertEqual(get_full_type_name(test_dict3), "dict")
760 def testIngest(self):
761 butler = Butler(self.tmpConfigFile, run=self.default_run)
763 # Create and register a DatasetType
764 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
766 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
767 datasetTypeName = "metric"
769 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
771 # Add needed Dimensions
772 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
773 butler.registry.insertDimensionData(
774 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
775 )
776 for detector in (1, 2):
777 butler.registry.insertDimensionData(
778 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"}
779 )
781 butler.registry.insertDimensionData(
782 "visit",
783 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
784 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"},
785 )
787 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
788 dataRoot = os.path.join(TESTDIR, "data", "basic")
789 datasets = []
790 for detector in (1, 2):
791 detector_name = f"detector_{detector}"
792 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
793 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
794 # Create a DatasetRef for ingest
795 refIn = DatasetRef(datasetType, dataId, id=None)
797 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter))
799 butler.ingest(*datasets, transfer="copy")
801 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
802 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
804 metrics1 = butler.get(datasetTypeName, dataId1)
805 metrics2 = butler.get(datasetTypeName, dataId2)
806 self.assertNotEqual(metrics1, metrics2)
808 # Compare URIs
809 uri1 = butler.getURI(datasetTypeName, dataId1)
810 uri2 = butler.getURI(datasetTypeName, dataId2)
811 self.assertNotEqual(uri1, uri2)
813 # Now do a multi-dataset but single file ingest
814 metricFile = os.path.join(dataRoot, "detectors.yaml")
815 refs = []
816 for detector in (1, 2):
817 detector_name = f"detector_{detector}"
818 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
819 # Create a DatasetRef for ingest
820 refs.append(DatasetRef(datasetType, dataId, id=None))
822 # Test "move" transfer to ensure that the files themselves
823 # have disappeared following ingest.
824 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile:
825 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy")
827 datasets = []
828 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter))
830 butler.ingest(*datasets, transfer="move", record_validation_info=False)
831 self.assertFalse(tempFile.exists())
833 # Check that the datastore recorded no file size.
834 # Not all datastores can support this.
835 try:
836 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0])
837 self.assertEqual(infos[0].file_size, -1)
838 except AttributeError:
839 pass
841 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
842 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
844 multi1 = butler.get(datasetTypeName, dataId1)
845 multi2 = butler.get(datasetTypeName, dataId2)
847 self.assertEqual(multi1, metrics1)
848 self.assertEqual(multi2, metrics2)
850 # Compare URIs
851 uri1 = butler.getURI(datasetTypeName, dataId1)
852 uri2 = butler.getURI(datasetTypeName, dataId2)
853 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
855 # Test that removing one does not break the second
856 # This line will issue a warning log message for a ChainedDatastore
857 # that uses an InMemoryDatastore since in-memory can not ingest
858 # files.
859 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
860 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
861 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
862 multi2b = butler.get(datasetTypeName, dataId2)
863 self.assertEqual(multi2, multi2b)
865 def testPruneCollections(self):
866 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
867 butler = Butler(self.tmpConfigFile, writeable=True)
868 # Load registry data with dimensions to hang datasets off of.
869 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
870 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
871 # Add some RUN-type collections.
872 run1 = "run1"
873 butler.registry.registerRun(run1)
874 run2 = "run2"
875 butler.registry.registerRun(run2)
876 # put some datasets. ref1 and ref2 have the same data ID, and are in
877 # different runs. ref3 has a different data ID.
878 metric = makeExampleMetrics()
879 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
880 datasetType = self.addDatasetType(
881 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
882 )
883 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
884 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
885 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
887 # Try to delete a RUN collection without purge, or with purge and not
888 # unstore.
889 with self.assertRaises(TypeError):
890 butler.pruneCollection(run1)
891 with self.assertRaises(TypeError):
892 butler.pruneCollection(run2, purge=True)
893 # Add a TAGGED collection and associate ref3 only into it.
894 tag1 = "tag1"
895 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
896 self.assertTrue(registered)
897 # Registering a second time should be allowed.
898 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
899 self.assertFalse(registered)
900 butler.registry.associate(tag1, [ref3])
901 # Add a CHAINED collection that searches run1 and then run2. It
902 # logically contains only ref1, because ref2 is shadowed due to them
903 # having the same data ID and dataset type.
904 chain1 = "chain1"
905 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
906 butler.registry.setCollectionChain(chain1, [run1, run2])
907 # Try to delete RUN collections, which should fail with complete
908 # rollback because they're still referenced by the CHAINED
909 # collection.
910 with self.assertRaises(sqlalchemy.exc.IntegrityError):
911 butler.pruneCollection(run1, purge=True, unstore=True)
912 with self.assertRaises(sqlalchemy.exc.IntegrityError):
913 butler.pruneCollection(run2, purge=True, unstore=True)
914 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3])
915 existence = butler.datastore.mexists([ref1, ref2, ref3])
916 self.assertTrue(existence[ref1])
917 self.assertTrue(existence[ref2])
918 self.assertTrue(existence[ref3])
919 # Try to delete CHAINED and TAGGED collections with purge; should not
920 # work.
921 with self.assertRaises(TypeError):
922 butler.pruneCollection(tag1, purge=True, unstore=True)
923 with self.assertRaises(TypeError):
924 butler.pruneCollection(chain1, purge=True, unstore=True)
925 # Remove the tagged collection with unstore=False. This should not
926 # affect the datasets.
927 butler.pruneCollection(tag1)
928 with self.assertRaises(MissingCollectionError):
929 butler.registry.getCollectionType(tag1)
930 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3])
931 existence = butler.datastore.mexists([ref1, ref2, ref3])
932 self.assertTrue(existence[ref1])
933 self.assertTrue(existence[ref2])
934 self.assertTrue(existence[ref3])
935 # Add the tagged collection back in, and remove it with unstore=True.
936 # This should remove ref3 only from the datastore.
937 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
938 butler.registry.associate(tag1, [ref3])
939 butler.pruneCollection(tag1, unstore=True)
940 with self.assertRaises(MissingCollectionError):
941 butler.registry.getCollectionType(tag1)
942 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3])
943 existence = butler.datastore.mexists([ref1, ref2, ref3])
944 self.assertTrue(existence[ref1])
945 self.assertTrue(existence[ref2])
946 self.assertFalse(existence[ref3])
947 # Delete the chain with unstore=False. The datasets should not be
948 # affected at all.
949 butler.pruneCollection(chain1)
950 with self.assertRaises(MissingCollectionError):
951 butler.registry.getCollectionType(chain1)
952 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3])
953 existence = butler.datastore.mexists([ref1, ref2, ref3])
954 self.assertTrue(existence[ref1])
955 self.assertTrue(existence[ref2])
956 self.assertFalse(existence[ref3])
957 # Redefine and then delete the chain with unstore=True. Only ref1
958 # should be unstored (ref3 has already been unstored, but otherwise
959 # would be now).
960 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
961 butler.registry.setCollectionChain(chain1, [run1, run2])
962 butler.pruneCollection(chain1, unstore=True)
963 with self.assertRaises(MissingCollectionError):
964 butler.registry.getCollectionType(chain1)
965 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3])
966 existence = butler.datastore.mexists([ref1, ref2, ref3])
967 self.assertFalse(existence[ref1])
968 self.assertTrue(existence[ref2])
969 self.assertFalse(existence[ref3])
970 # Remove run1. This removes ref1 and ref3 from the registry (they're
971 # already gone from the datastore, which is fine).
972 butler.pruneCollection(run1, purge=True, unstore=True)
973 with self.assertRaises(MissingCollectionError):
974 butler.registry.getCollectionType(run1)
975 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2])
976 self.assertTrue(butler.datastore.exists(ref2))
977 # Remove run2. This removes ref2 from the registry and the datastore.
978 butler.pruneCollection(run2, purge=True, unstore=True)
979 with self.assertRaises(MissingCollectionError):
980 butler.registry.getCollectionType(run2)
981 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [])
983 # Now that the collections have been pruned we can remove the
984 # dataset type
985 butler.registry.removeDatasetType(datasetType.name)
987 def testPickle(self):
988 """Test pickle support."""
989 butler = Butler(self.tmpConfigFile, run=self.default_run)
990 butlerOut = pickle.loads(pickle.dumps(butler))
991 self.assertIsInstance(butlerOut, Butler)
992 self.assertEqual(butlerOut._config, butler._config)
993 self.assertEqual(butlerOut.collections, butler.collections)
994 self.assertEqual(butlerOut.run, butler.run)
996 def testGetDatasetTypes(self):
997 butler = Butler(self.tmpConfigFile, run=self.default_run)
998 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
999 dimensionEntries = [
1000 (
1001 "instrument",
1002 {"instrument": "DummyCam"},
1003 {"instrument": "DummyHSC"},
1004 {"instrument": "DummyCamComp"},
1005 ),
1006 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1007 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1008 ]
1009 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1010 # Add needed Dimensions
1011 for args in dimensionEntries:
1012 butler.registry.insertDimensionData(*args)
1014 # When a DatasetType is added to the registry entries are not created
1015 # for components but querying them can return the components.
1016 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
1017 components = set()
1018 for datasetTypeName in datasetTypeNames:
1019 # Create and register a DatasetType
1020 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1022 for componentName in storageClass.components:
1023 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
1025 fromRegistry: set[DatasetType] = set()
1026 for parent_dataset_type in butler.registry.queryDatasetTypes():
1027 fromRegistry.add(parent_dataset_type)
1028 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes())
1029 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
1031 # Now that we have some dataset types registered, validate them
1032 butler.validateConfiguration(
1033 ignore=[
1034 "test_metric_comp",
1035 "metric3",
1036 "metric5",
1037 "calexp",
1038 "DummySC",
1039 "datasetType.component",
1040 "random_data",
1041 "random_data_2",
1042 ]
1043 )
1045 # Add a new datasetType that will fail template validation
1046 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
1047 if self.validationCanFail:
1048 with self.assertRaises(ValidationError):
1049 butler.validateConfiguration()
1051 # Rerun validation but with a subset of dataset type names
1052 butler.validateConfiguration(datasetTypeNames=["metric4"])
1054 # Rerun validation but ignore the bad datasetType
1055 butler.validateConfiguration(
1056 ignore=[
1057 "test_metric_comp",
1058 "metric3",
1059 "metric5",
1060 "calexp",
1061 "DummySC",
1062 "datasetType.component",
1063 "random_data",
1064 "random_data_2",
1065 ]
1066 )
1068 def testTransaction(self):
1069 butler = Butler(self.tmpConfigFile, run=self.default_run)
1070 datasetTypeName = "test_metric"
1071 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
1072 dimensionEntries = (
1073 ("instrument", {"instrument": "DummyCam"}),
1074 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1075 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1076 )
1077 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1078 metric = makeExampleMetrics()
1079 dataId = {"instrument": "DummyCam", "visit": 42}
1080 # Create and register a DatasetType
1081 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1082 with self.assertRaises(TransactionTestError):
1083 with butler.transaction():
1084 # Add needed Dimensions
1085 for args in dimensionEntries:
1086 butler.registry.insertDimensionData(*args)
1087 # Store a dataset
1088 ref = butler.put(metric, datasetTypeName, dataId)
1089 self.assertIsInstance(ref, DatasetRef)
1090 # Test getDirect
1091 metricOut = butler.getDirect(ref)
1092 self.assertEqual(metric, metricOut)
1093 # Test get
1094 metricOut = butler.get(datasetTypeName, dataId)
1095 self.assertEqual(metric, metricOut)
1096 # Check we can get components
1097 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric)
1098 raise TransactionTestError("This should roll back the entire transaction")
1099 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"):
1100 butler.registry.expandDataId(dataId)
1101 # Should raise LookupError for missing data ID value
1102 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
1103 butler.get(datasetTypeName, dataId)
1104 # Also check explicitly if Dataset entry is missing
1105 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
1106 # Direct retrieval should not find the file in the Datastore
1107 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
1108 butler.getDirect(ref)
1110 def testMakeRepo(self):
1111 """Test that we can write butler configuration to a new repository via
1112 the Butler.makeRepo interface and then instantiate a butler from the
1113 repo root.
1114 """
1115 # Do not run the test if we know this datastore configuration does
1116 # not support a file system root
1117 if self.fullConfigKey is None:
1118 return
1120 # create two separate directories
1121 root1 = tempfile.mkdtemp(dir=self.root)
1122 root2 = tempfile.mkdtemp(dir=self.root)
1124 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
1125 limited = Config(self.configFile)
1126 butler1 = Butler(butlerConfig)
1127 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
1128 full = Config(self.tmpConfigFile)
1129 butler2 = Butler(butlerConfig)
1130 # Butlers should have the same configuration regardless of whether
1131 # defaults were expanded.
1132 self.assertEqual(butler1._config, butler2._config)
1133 # Config files loaded directly should not be the same.
1134 self.assertNotEqual(limited, full)
1135 # Make sure "limited" doesn't have a few keys we know it should be
1136 # inheriting from defaults.
1137 self.assertIn(self.fullConfigKey, full)
1138 self.assertNotIn(self.fullConfigKey, limited)
1140 # Collections don't appear until something is put in them
1141 collections1 = set(butler1.registry.queryCollections())
1142 self.assertEqual(collections1, set())
1143 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
1145 # Check that a config with no associated file name will not
1146 # work properly with relocatable Butler repo
1147 butlerConfig.configFile = None
1148 with self.assertRaises(ValueError):
1149 Butler(butlerConfig)
1151 with self.assertRaises(FileExistsError):
1152 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)
1154 def testStringification(self):
1155 butler = Butler(self.tmpConfigFile, run=self.default_run)
1156 butlerStr = str(butler)
1158 if self.datastoreStr is not None:
1159 for testStr in self.datastoreStr:
1160 self.assertIn(testStr, butlerStr)
1161 if self.registryStr is not None:
1162 self.assertIn(self.registryStr, butlerStr)
1164 datastoreName = butler.datastore.name
1165 if self.datastoreName is not None:
1166 for testStr in self.datastoreName:
1167 self.assertIn(testStr, datastoreName)
1169 def testButlerRewriteDataId(self):
1170 """Test that dataIds can be rewritten based on dimension records."""
1172 butler = Butler(self.tmpConfigFile, run=self.default_run)
1174 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1175 datasetTypeName = "random_data"
1177 # Create dimension records.
1178 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1179 butler.registry.insertDimensionData(
1180 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1181 )
1182 butler.registry.insertDimensionData(
1183 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1184 )
1186 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"])
1187 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1188 butler.registry.registerDatasetType(datasetType)
1190 n_exposures = 5
1191 dayobs = 20210530
1193 for i in range(n_exposures):
1194 butler.registry.insertDimensionData(
1195 "exposure",
1196 {
1197 "instrument": "DummyCamComp",
1198 "id": i,
1199 "obs_id": f"exp{i}",
1200 "seq_num": i,
1201 "day_obs": dayobs,
1202 "physical_filter": "d-r",
1203 },
1204 )
1206 # Write some data.
1207 for i in range(n_exposures):
1208 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]}
1210 # Use the seq_num for the put to test rewriting.
1211 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1212 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1214 # Check that the exposure is correct in the dataId
1215 self.assertEqual(ref.dataId["exposure"], i)
1217 # and check that we can get the dataset back with the same dataId
1218 new_metric = butler.get(datasetTypeName, dataId=dataId)
1219 self.assertEqual(new_metric, metric)
1222class FileDatastoreButlerTests(ButlerTests):
1223 """Common tests and specialization of ButlerTests for butlers backed
1224 by datastores that inherit from FileDatastore.
1225 """
1227 def checkFileExists(self, root, relpath):
1228 """Checks if file exists at a given path (relative to root).
1230 Test testPutTemplates verifies actual physical existance of the files
1231 in the requested location.
1232 """
1233 uri = ResourcePath(root, forceDirectory=True)
1234 return uri.join(relpath).exists()
1236 def testPutTemplates(self):
1237 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1238 butler = Butler(self.tmpConfigFile, run=self.default_run)
1240 # Add needed Dimensions
1241 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1242 butler.registry.insertDimensionData(
1243 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1244 )
1245 butler.registry.insertDimensionData(
1246 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"}
1247 )
1248 butler.registry.insertDimensionData(
1249 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"}
1250 )
1252 # Create and store a dataset
1253 metric = makeExampleMetrics()
1255 # Create two almost-identical DatasetTypes (both will use default
1256 # template)
1257 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
1258 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1259 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1260 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1262 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1263 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1265 # Put with exactly the data ID keys needed
1266 ref = butler.put(metric, "metric1", dataId1)
1267 uri = butler.getURI(ref)
1268 self.assertTrue(
1269 self.checkFileExists(
1270 butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle"
1271 ),
1272 f"Checking existence of {uri}",
1273 )
1275 # Check the template based on dimensions
1276 butler.datastore.templates.validateTemplates([ref])
1278 # Put with extra data ID keys (physical_filter is an optional
1279 # dependency); should not change template (at least the way we're
1280 # defining them to behave now; the important thing is that they
1281 # must be consistent).
1282 ref = butler.put(metric, "metric2", dataId2)
1283 uri = butler.getURI(ref)
1284 self.assertTrue(
1285 self.checkFileExists(
1286 butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle"
1287 ),
1288 f"Checking existence of {uri}",
1289 )
1291 # Check the template based on dimensions
1292 butler.datastore.templates.validateTemplates([ref])
1294 # Use a template that has a typo in dimension record metadata.
1295 # Easier to test with a butler that has a ref with records attached.
1296 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
1297 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1298 path = template.format(ref)
1299 self.assertEqual(path, f"a/v423/{ref.id}_fits")
1301 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
1302 with self.assertRaises(KeyError):
1303 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1304 template.format(ref)
1306 # Now use a file template that will not result in unique filenames
1307 with self.assertRaises(FileTemplateValidationError):
1308 butler.put(metric, "metric3", dataId1)
1310 def testImportExport(self):
1311 # Run put/get tests just to create and populate a repo.
1312 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1313 self.runImportExportTest(storageClass)
1315 @unittest.expectedFailure
1316 def testImportExportVirtualComposite(self):
1317 # Run put/get tests just to create and populate a repo.
1318 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1319 self.runImportExportTest(storageClass)
1321 def runImportExportTest(self, storageClass):
1322 """This test does an export to a temp directory and an import back
1323 into a new temp directory repo. It does not assume a posix datastore"""
1324 exportButler = self.runPutGetTest(storageClass, "test_metric")
1325 print("Root:", exportButler.datastore.root)
1326 # Test that the repo actually has at least one dataset.
1327 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1328 self.assertGreater(len(datasets), 0)
1329 # Add a DimensionRecord that's unused by those datasets.
1330 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1331 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1332 # Export and then import datasets.
1333 with safeTestTempDir(TESTDIR) as exportDir:
1334 exportFile = os.path.join(exportDir, "exports.yaml")
1335 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1336 export.saveDatasets(datasets)
1337 # Export the same datasets again. This should quietly do
1338 # nothing because of internal deduplication, and it shouldn't
1339 # complain about being asked to export the "htm7" elements even
1340 # though there aren't any in these datasets or in the database.
1341 export.saveDatasets(datasets, elements=["htm7"])
1342 # Save one of the data IDs again; this should be harmless
1343 # because of internal deduplication.
1344 export.saveDataIds([datasets[0].dataId])
1345 # Save some dimension records directly.
1346 export.saveDimensionData("skymap", [skymapRecord])
1347 self.assertTrue(os.path.exists(exportFile))
1348 with safeTestTempDir(TESTDIR) as importDir:
1349 # We always want this to be a local posix butler
1350 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1351 # Calling script.butlerImport tests the implementation of the
1352 # butler command line interface "import" subcommand. Functions
1353 # in the script folder are generally considered protected and
1354 # should not be used as public api.
1355 with open(exportFile, "r") as f:
1356 script.butlerImport(
1357 importDir,
1358 export_file=f,
1359 directory=exportDir,
1360 transfer="auto",
1361 skip_dimensions=None,
1362 reuse_ids=False,
1363 )
1364 importButler = Butler(importDir, run=self.default_run)
1365 for ref in datasets:
1366 with self.subTest(ref=ref):
1367 # Test for existence by passing in the DatasetType and
1368 # data ID separately, to avoid lookup by dataset_id.
1369 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
1370 self.assertEqual(
1371 list(importButler.registry.queryDimensionRecords("skymap")),
1372 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)],
1373 )
1375 def testRemoveRuns(self):
1376 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1377 butler = Butler(self.tmpConfigFile, writeable=True)
1378 # Load registry data with dimensions to hang datasets off of.
1379 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1380 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1381 # Add some RUN-type collection.
1382 run1 = "run1"
1383 butler.registry.registerRun(run1)
1384 run2 = "run2"
1385 butler.registry.registerRun(run2)
1386 # put a dataset in each
1387 metric = makeExampleMetrics()
1388 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1389 datasetType = self.addDatasetType(
1390 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1391 )
1392 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1393 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1394 uri1 = butler.getURI(ref1, collections=[run1])
1395 uri2 = butler.getURI(ref2, collections=[run2])
1396 # Remove from both runs with different values for unstore.
1397 butler.removeRuns([run1], unstore=True)
1398 butler.removeRuns([run2], unstore=False)
1399 # Should be nothing in registry for either one, and datastore should
1400 # not think either exists.
1401 with self.assertRaises(MissingCollectionError):
1402 butler.registry.getCollectionType(run1)
1403 with self.assertRaises(MissingCollectionError):
1404 butler.registry.getCollectionType(run2)
1405 self.assertFalse(butler.datastore.exists(ref1))
1406 self.assertFalse(butler.datastore.exists(ref2))
1407 # The ref we unstored should be gone according to the URI, but the
1408 # one we forgot should still be around.
1409 self.assertFalse(uri1.exists())
1410 self.assertTrue(uri2.exists())
1413class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1414 """PosixDatastore specialization of a butler"""
1416 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1417 fullConfigKey = ".datastore.formatters"
1418 validationCanFail = True
1419 datastoreStr = ["/tmp"]
1420 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1421 registryStr = "/gen3.sqlite3"
1423 def testPathConstructor(self):
1424 """Independent test of constructor using PathLike."""
1425 butler = Butler(self.tmpConfigFile, run=self.default_run)
1426 self.assertIsInstance(butler, Butler)
1428 # And again with a Path object with the butler yaml
1429 path = pathlib.Path(self.tmpConfigFile)
1430 butler = Butler(path, writeable=False)
1431 self.assertIsInstance(butler, Butler)
1433 # And again with a Path object without the butler yaml
1434 # (making sure we skip it if the tmp config doesn't end
1435 # in butler.yaml -- which is the case for a subclass)
1436 if self.tmpConfigFile.endswith("butler.yaml"):
1437 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1438 butler = Butler(path, writeable=False)
1439 self.assertIsInstance(butler, Butler)
1441 def testExportTransferCopy(self):
1442 """Test local export using all transfer modes"""
1443 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1444 exportButler = self.runPutGetTest(storageClass, "test_metric")
1445 # Test that the repo actually has at least one dataset.
1446 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1447 self.assertGreater(len(datasets), 0)
1448 uris = [exportButler.getURI(d) for d in datasets]
1449 datastoreRoot = exportButler.datastore.root
1451 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1453 for path in pathsInStore:
1454 # Assume local file system
1455 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}")
1457 for transfer in ("copy", "link", "symlink", "relsymlink"):
1458 with safeTestTempDir(TESTDIR) as exportDir:
1459 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export:
1460 export.saveDatasets(datasets)
1461 for path in pathsInStore:
1462 self.assertTrue(
1463 self.checkFileExists(exportDir, path),
1464 f"Check that mode {transfer} exported files",
1465 )
1467 def testPruneDatasets(self):
1468 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1469 butler = Butler(self.tmpConfigFile, writeable=True)
1470 # Load registry data with dimensions to hang datasets off of.
1471 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1472 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1473 # Add some RUN-type collections.
1474 run1 = "run1"
1475 butler.registry.registerRun(run1)
1476 run2 = "run2"
1477 butler.registry.registerRun(run2)
1478 # put some datasets. ref1 and ref2 have the same data ID, and are in
1479 # different runs. ref3 has a different data ID.
1480 metric = makeExampleMetrics()
1481 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1482 datasetType = self.addDatasetType(
1483 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1484 )
1485 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1486 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1487 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1489 # Simple prune.
1490 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1491 with self.assertRaises(LookupError):
1492 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1)
1494 # Put data back.
1495 ref1 = butler.put(metric, ref1.unresolved(), run=run1)
1496 ref2 = butler.put(metric, ref2.unresolved(), run=run2)
1497 ref3 = butler.put(metric, ref3.unresolved(), run=run1)
1499 # Check that in normal mode, deleting the record will lead to
1500 # trash not touching the file.
1501 uri1 = butler.datastore.getURI(ref1)
1502 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table
1503 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id})
1504 butler.datastore.trash(ref1)
1505 butler.datastore.emptyTrash()
1506 self.assertTrue(uri1.exists())
1507 uri1.remove() # Clean it up.
1509 # Simulate execution butler setup by deleting the datastore
1510 # record but keeping the file around and trusting.
1511 butler.datastore.trustGetRequest = True
1512 uri2 = butler.datastore.getURI(ref2)
1513 uri3 = butler.datastore.getURI(ref3)
1514 self.assertTrue(uri2.exists())
1515 self.assertTrue(uri3.exists())
1517 # Remove the datastore record.
1518 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table
1519 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id})
1520 self.assertTrue(uri2.exists())
1521 butler.datastore.trash([ref2, ref3])
1522 # Immediate removal for ref2 file
1523 self.assertFalse(uri2.exists())
1524 # But ref3 has to wait for the empty.
1525 self.assertTrue(uri3.exists())
1526 butler.datastore.emptyTrash()
1527 self.assertFalse(uri3.exists())
1529 # Clear out the datasets from registry.
1530 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1532 def testPytypeCoercion(self):
1533 """Test python type coercion on Butler.get and put."""
1535 # Store some data with the normal example storage class.
1536 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1537 datasetTypeName = "test_metric"
1538 butler = self.runPutGetTest(storageClass, datasetTypeName)
1540 dataId = {"instrument": "DummyCamComp", "visit": 423}
1541 metric = butler.get(datasetTypeName, dataId=dataId)
1542 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample")
1544 datasetType_ori = butler.registry.getDatasetType(datasetTypeName)
1545 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents")
1547 # Now need to hack the registry dataset type definition.
1548 # There is no API for this.
1549 manager = butler.registry._managers.datasets
1550 manager._db.update(
1551 manager._static.dataset_type,
1552 {"name": datasetTypeName},
1553 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"},
1554 )
1556 # Force reset of dataset type cache
1557 butler.registry.refresh()
1559 datasetType_new = butler.registry.getDatasetType(datasetTypeName)
1560 self.assertEqual(datasetType_new.name, datasetType_ori.name)
1561 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel")
1563 metric_model = butler.get(datasetTypeName, dataId=dataId)
1564 self.assertNotEqual(type(metric_model), type(metric))
1565 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel")
1567 # Put the model and read it back to show that everything now
1568 # works as normal.
1569 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424)
1570 metric_model_new = butler.get(metric_ref)
1571 self.assertEqual(metric_model_new, metric_model)
1573 # Hack the storage class again to something that will fail on the
1574 # get with no conversion class.
1575 manager._db.update(
1576 manager._static.dataset_type,
1577 {"name": datasetTypeName},
1578 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"},
1579 )
1580 butler.registry.refresh()
1582 with self.assertRaises(ValueError):
1583 butler.get(datasetTypeName, dataId=dataId)
1586@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1587class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1588 """PosixDatastore specialization of a butler using Postgres"""
1590 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1591 fullConfigKey = ".datastore.formatters"
1592 validationCanFail = True
1593 datastoreStr = ["/tmp"]
1594 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1595 registryStr = "PostgreSQL@test"
1597 @staticmethod
1598 def _handler(postgresql):
1599 engine = sqlalchemy.engine.create_engine(postgresql.url())
1600 with engine.begin() as connection:
1601 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
1603 @classmethod
1604 def setUpClass(cls):
1605 # Create the postgres test server.
1606 cls.postgresql = testing.postgresql.PostgresqlFactory(
1607 cache_initialized_db=True, on_initialized=cls._handler
1608 )
1609 super().setUpClass()
1611 @classmethod
1612 def tearDownClass(cls):
1613 # Clean up any lingering SQLAlchemy engines/connections
1614 # so they're closed before we shut down the server.
1615 gc.collect()
1616 cls.postgresql.clear_cache()
1617 super().tearDownClass()
1619 def setUp(self):
1620 self.server = self.postgresql()
1622 # Need to add a registry section to the config.
1623 self._temp_config = False
1624 config = Config(self.configFile)
1625 config["registry", "db"] = self.server.url()
1626 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh:
1627 config.dump(fh)
1628 self.configFile = fh.name
1629 self._temp_config = True
1630 super().setUp()
1632 def tearDown(self):
1633 self.server.stop()
1634 if self._temp_config and os.path.exists(self.configFile):
1635 os.remove(self.configFile)
1636 super().tearDown()
1638 def testMakeRepo(self):
1639 # The base class test assumes that it's using sqlite and assumes
1640 # the config file is acceptable to sqlite.
1641 raise unittest.SkipTest("Postgres config is not compatible with this test.")
1644class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1645 """InMemoryDatastore specialization of a butler"""
1647 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1648 fullConfigKey = None
1649 useTempRoot = False
1650 validationCanFail = False
1651 datastoreStr = ["datastore='InMemory"]
1652 datastoreName = ["InMemoryDatastore@"]
1653 registryStr = "/gen3.sqlite3"
1655 def testIngest(self):
1656 pass
1659class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1660 """PosixDatastore specialization"""
1662 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1663 fullConfigKey = ".datastore.datastores.1.formatters"
1664 validationCanFail = True
1665 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1666 datastoreName = [
1667 "InMemoryDatastore@",
1668 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1669 "SecondDatastore",
1670 ]
1671 registryStr = "/gen3.sqlite3"
1674class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1675 """Test that a yaml file in one location can refer to a root in another."""
1677 datastoreStr = ["dir1"]
1678 # Disable the makeRepo test since we are deliberately not using
1679 # butler.yaml as the config name.
1680 fullConfigKey = None
1682 def setUp(self):
1683 self.root = makeTestTempDir(TESTDIR)
1685 # Make a new repository in one place
1686 self.dir1 = os.path.join(self.root, "dir1")
1687 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1689 # Move the yaml file to a different place and add a "root"
1690 self.dir2 = os.path.join(self.root, "dir2")
1691 os.makedirs(self.dir2, exist_ok=True)
1692 configFile1 = os.path.join(self.dir1, "butler.yaml")
1693 config = Config(configFile1)
1694 config["root"] = self.dir1
1695 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1696 config.dumpToUri(configFile2)
1697 os.remove(configFile1)
1698 self.tmpConfigFile = configFile2
1700 def testFileLocations(self):
1701 self.assertNotEqual(self.dir1, self.dir2)
1702 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1703 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1704 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1707class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1708 """Test that a config file created by makeRepo outside of repo works."""
1710 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1712 def setUp(self):
1713 self.root = makeTestTempDir(TESTDIR)
1714 self.root2 = makeTestTempDir(TESTDIR)
1716 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1717 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1719 def tearDown(self):
1720 if os.path.exists(self.root2):
1721 shutil.rmtree(self.root2, ignore_errors=True)
1722 super().tearDown()
1724 def testConfigExistence(self):
1725 c = Config(self.tmpConfigFile)
1726 uri_config = ResourcePath(c["root"])
1727 uri_expected = ResourcePath(self.root, forceDirectory=True)
1728 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1729 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1731 def testPutGet(self):
1732 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1733 self.runPutGetTest(storageClass, "test_metric")
1736class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1737 """Test that a config file created by makeRepo outside of repo works."""
1739 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1741 def setUp(self):
1742 self.root = makeTestTempDir(TESTDIR)
1743 self.root2 = makeTestTempDir(TESTDIR)
1745 self.tmpConfigFile = self.root2
1746 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1748 def testConfigExistence(self):
1749 # Append the yaml file else Config constructor does not know the file
1750 # type.
1751 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1752 super().testConfigExistence()
1755class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1756 """Test that a config file created by makeRepo outside of repo works."""
1758 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1760 def setUp(self):
1761 self.root = makeTestTempDir(TESTDIR)
1762 self.root2 = makeTestTempDir(TESTDIR)
1764 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl()
1765 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1768@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1769class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1770 """S3Datastore specialization of a butler; an S3 storage Datastore +
1771 a local in-memory SqlRegistry.
1772 """
1774 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1775 fullConfigKey = None
1776 validationCanFail = True
1778 bucketName = "anybucketname"
1779 """Name of the Bucket that will be used in the tests. The name is read from
1780 the config file used with the tests during set-up.
1781 """
1783 root = "butlerRoot/"
1784 """Root repository directory expected to be used in case useTempRoot=False.
1785 Otherwise the root is set to a 20 characters long randomly generated string
1786 during set-up.
1787 """
1789 datastoreStr = [f"datastore={root}"]
1790 """Contains all expected root locations in a format expected to be
1791 returned by Butler stringification.
1792 """
1794 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1795 """The expected format of the S3 Datastore string."""
1797 registryStr = "/gen3.sqlite3"
1798 """Expected format of the Registry string."""
1800 mock_s3 = mock_s3()
1801 """The mocked s3 interface from moto."""
1803 def genRoot(self):
1804 """Returns a random string of len 20 to serve as a root
1805 name for the temporary bucket repo.
1807 This is equivalent to tempfile.mkdtemp as this is what self.root
1808 becomes when useTempRoot is True.
1809 """
1810 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1811 return rndstr + "/"
1813 def setUp(self):
1814 config = Config(self.configFile)
1815 uri = ResourcePath(config[".datastore.datastore.root"])
1816 self.bucketName = uri.netloc
1818 # Enable S3 mocking of tests.
1819 self.mock_s3.start()
1821 # set up some fake credentials if they do not exist
1822 self.usingDummyCredentials = setAwsEnvCredentials()
1824 if self.useTempRoot:
1825 self.root = self.genRoot()
1826 rooturi = f"s3://{self.bucketName}/{self.root}"
1827 config.update({"datastore": {"datastore": {"root": rooturi}}})
1829 # need local folder to store registry database
1830 self.reg_dir = makeTestTempDir(TESTDIR)
1831 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1833 # MOTO needs to know that we expect Bucket bucketname to exist
1834 # (this used to be the class attribute bucketName)
1835 s3 = boto3.resource("s3")
1836 s3.create_bucket(Bucket=self.bucketName)
1838 self.datastoreStr = f"datastore={self.root}"
1839 self.datastoreName = [f"FileDatastore@{rooturi}"]
1840 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1841 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1843 def tearDown(self):
1844 s3 = boto3.resource("s3")
1845 bucket = s3.Bucket(self.bucketName)
1846 try:
1847 bucket.objects.all().delete()
1848 except botocore.exceptions.ClientError as e:
1849 if e.response["Error"]["Code"] == "404":
1850 # the key was not reachable - pass
1851 pass
1852 else:
1853 raise
1855 bucket = s3.Bucket(self.bucketName)
1856 bucket.delete()
1858 # Stop the S3 mock.
1859 self.mock_s3.stop()
1861 # unset any potentially set dummy credentials
1862 if self.usingDummyCredentials:
1863 unsetAwsEnvCredentials()
1865 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1866 shutil.rmtree(self.reg_dir, ignore_errors=True)
1868 if self.useTempRoot and os.path.exists(self.root):
1869 shutil.rmtree(self.root, ignore_errors=True)
1871 super().tearDown()
1874@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1875class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1876 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1877 a local in-memory SqlRegistry.
1878 """
1880 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1881 fullConfigKey = None
1882 validationCanFail = True
1884 serverName = "localhost"
1885 """Name of the server that will be used in the tests.
1886 """
1888 portNumber = 8080
1889 """Port on which the webdav server listens. Automatically chosen
1890 at setUpClass via the _getfreeport() method
1891 """
1893 root = "butlerRoot/"
1894 """Root repository directory expected to be used in case useTempRoot=False.
1895 Otherwise the root is set to a 20 characters long randomly generated string
1896 during set-up.
1897 """
1899 datastoreStr = [f"datastore={root}"]
1900 """Contains all expected root locations in a format expected to be
1901 returned by Butler stringification.
1902 """
1904 datastoreName = ["FileDatastore@https://{serverName}/{root}"]
1905 """The expected format of the WebdavDatastore string."""
1907 registryStr = "/gen3.sqlite3"
1908 """Expected format of the Registry string."""
1910 serverThread = None
1911 """Thread in which the local webdav server will run"""
1913 stopWebdavServer = False
1914 """This flag will cause the webdav server to
1915 gracefully shut down when True
1916 """
1918 def genRoot(self):
1919 """Returns a random string of len 20 to serve as a root
1920 name for the temporary bucket repo.
1922 This is equivalent to tempfile.mkdtemp as this is what self.root
1923 becomes when useTempRoot is True.
1924 """
1925 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1926 return rndstr + "/"
1928 @classmethod
1929 def setUpClass(cls):
1930 # Do the same as inherited class
1931 cls.storageClassFactory = StorageClassFactory()
1932 cls.storageClassFactory.addFromConfig(cls.configFile)
1934 cls.portNumber = cls._getfreeport()
1935 # Run a local webdav server on which tests will be run
1936 cls.serverThread = Thread(
1937 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True
1938 )
1939 cls.serverThread.start()
1940 # Wait for it to start
1941 time.sleep(3)
1943 @classmethod
1944 def tearDownClass(cls):
1945 # Ask for graceful shut down of the webdav server
1946 cls.stopWebdavServer = True
1947 # Wait for the thread to exit
1948 cls.serverThread.join()
1949 super().tearDownClass()
1951 def setUp(self):
1952 config = Config(self.configFile)
1954 if self.useTempRoot:
1955 self.root = self.genRoot()
1956 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1957 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1959 # need local folder to store registry database
1960 self.reg_dir = makeTestTempDir(TESTDIR)
1961 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1963 self.datastoreStr = f"datastore={self.root}"
1964 self.datastoreName = [f"FileDatastore@{self.rooturi}"]
1966 if not _is_webdav_endpoint(self.rooturi):
1967 raise OSError("Webdav server not running properly: cannot run tests.")
1969 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1970 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1972 def tearDown(self):
1973 # Clear temporary directory
1974 ResourcePath(self.rooturi).remove()
1975 ResourcePath(self.rooturi).session.close()
1977 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1978 shutil.rmtree(self.reg_dir, ignore_errors=True)
1980 if self.useTempRoot and os.path.exists(self.root):
1981 shutil.rmtree(self.root, ignore_errors=True)
1983 super().tearDown()
1985 def _serveWebdav(self, port: int, stopWebdavServer):
1986 """Starts a local webdav-compatible HTTP server,
1987 Listening on http://localhost:port
1988 This server only runs when this test class is instantiated,
1989 and then shuts down. Must be started is a separate thread.
1991 Parameters
1992 ----------
1993 port : `int`
1994 The port number on which the server should listen
1995 """
1996 root_path = gettempdir()
1998 config = {
1999 "host": "0.0.0.0",
2000 "port": port,
2001 "provider_mapping": {"/": root_path},
2002 "http_authenticator": {"domain_controller": None},
2003 "simple_dc": {"user_mapping": {"*": True}},
2004 "verbose": 0,
2005 }
2006 app = WsgiDAVApp(config)
2008 server_args = {
2009 "bind_addr": (config["host"], config["port"]),
2010 "wsgi_app": app,
2011 }
2012 server = wsgi.Server(**server_args)
2013 server.prepare()
2015 try:
2016 # Start the actual server in a separate thread
2017 t = Thread(target=server.serve, daemon=True)
2018 t.start()
2019 # watch stopWebdavServer, and gracefully
2020 # shut down the server when True
2021 while True:
2022 if stopWebdavServer():
2023 break
2024 time.sleep(1)
2025 except KeyboardInterrupt:
2026 print("Caught Ctrl-C, shutting down...")
2027 finally:
2028 server.stop()
2029 t.join()
2031 def _getfreeport():
2032 """
2033 Determines a free port using sockets.
2034 """
2035 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
2036 free_socket.bind(("127.0.0.1", 0))
2037 free_socket.listen()
2038 port = free_socket.getsockname()[1]
2039 free_socket.close()
2040 return port
2043class PosixDatastoreTransfers(unittest.TestCase):
2044 """Test data transfers between butlers.
2046 Test for different managers. UUID to UUID and integer to integer are
2047 tested. UUID to integer is not supported since we do not currently
2048 want to allow that. Integer to UUID is supported with the caveat
2049 that UUID4 will be generated and this will be incorrect for raw
2050 dataset types. The test ignores that.
2051 """
2053 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2055 @classmethod
2056 def setUpClass(cls):
2057 cls.storageClassFactory = StorageClassFactory()
2058 cls.storageClassFactory.addFromConfig(cls.configFile)
2060 def setUp(self):
2061 self.root = makeTestTempDir(TESTDIR)
2062 self.config = Config(self.configFile)
2064 def tearDown(self):
2065 removeTestTempDir(self.root)
2067 def create_butler(self, manager, label):
2068 config = Config(self.configFile)
2069 config["registry", "managers", "datasets"] = manager
2070 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True)
2072 def create_butlers(self, manager1, manager2):
2073 self.source_butler = self.create_butler(manager1, "1")
2074 self.target_butler = self.create_butler(manager2, "2")
2076 def testTransferUuidToUuid(self):
2077 self.create_butlers(
2078 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2079 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2080 )
2081 # Setting id_gen_map should have no effect here
2082 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
2084 def testTransferIntToInt(self):
2085 with self.assertWarns(FutureWarning):
2086 self.create_butlers(
2087 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager",
2088 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager",
2089 )
2090 # int dataset ID only allows UNIQUE
2091 self.assertButlerTransfers()
2093 def testTransferIntToUuid(self):
2094 with self.assertWarns(FutureWarning):
2095 self.create_butlers(
2096 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager",
2097 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2098 )
2099 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
2101 def testTransferMissing(self):
2102 """Test transfers where datastore records are missing.
2104 This is how execution butler works.
2105 """
2106 self.create_butlers(
2107 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2108 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2109 )
2111 # Configure the source butler to allow trust.
2112 self.source_butler.datastore.trustGetRequest = True
2114 self.assertButlerTransfers(purge=True)
2116 def testTransferMissingDisassembly(self):
2117 """Test transfers where datastore records are missing.
2119 This is how execution butler works.
2120 """
2121 self.create_butlers(
2122 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2123 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2124 )
2126 # Configure the source butler to allow trust.
2127 self.source_butler.datastore.trustGetRequest = True
2129 # Test disassembly.
2130 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
2132 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"):
2133 """Test that a run can be transferred to another butler."""
2135 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2136 datasetTypeName = "random_data"
2138 # Test will create 3 collections and we will want to transfer
2139 # two of those three.
2140 runs = ["run1", "run2", "other"]
2142 # Also want to use two different dataset types to ensure that
2143 # grouping works.
2144 datasetTypeNames = ["random_data", "random_data_2"]
2146 # Create the run collections in the source butler.
2147 for run in runs:
2148 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2150 # Create dimensions in source butler.
2151 n_exposures = 30
2152 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
2153 self.source_butler.registry.insertDimensionData(
2154 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
2155 )
2156 self.source_butler.registry.insertDimensionData(
2157 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
2158 )
2160 for i in range(n_exposures):
2161 self.source_butler.registry.insertDimensionData(
2162 "exposure",
2163 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"},
2164 )
2166 # Create dataset types in the source butler.
2167 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"])
2168 for datasetTypeName in datasetTypeNames:
2169 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2170 self.source_butler.registry.registerDatasetType(datasetType)
2172 # Write a dataset to an unrelated run -- this will ensure that
2173 # we are rewriting integer dataset ids in the target if necessary.
2174 # Will not be relevant for UUID.
2175 run = "distraction"
2176 butler = Butler(butler=self.source_butler, run=run)
2177 butler.put(
2178 makeExampleMetrics(),
2179 datasetTypeName,
2180 exposure=1,
2181 instrument="DummyCamComp",
2182 physical_filter="d-r",
2183 )
2185 # Write some example metrics to the source
2186 butler = Butler(butler=self.source_butler)
2188 # Set of DatasetRefs that should be in the list of refs to transfer
2189 # but which will not be transferred.
2190 deleted = set()
2192 n_expected = 20 # Number of datasets expected to be transferred
2193 source_refs = []
2194 for i in range(n_exposures):
2195 # Put a third of datasets into each collection, only retain
2196 # two thirds.
2197 index = i % 3
2198 run = runs[index]
2199 datasetTypeName = datasetTypeNames[i % 2]
2201 metric_data = {
2202 "summary": {"counter": i},
2203 "output": {"text": "metric"},
2204 "data": [2 * x for x in range(i)],
2205 }
2206 metric = MetricsExample(**metric_data)
2207 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"}
2208 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
2210 # Remove the datastore record using low-level API
2211 if purge:
2212 # Remove records for a fraction.
2213 if index == 1:
2215 # For one of these delete the file as well.
2216 # This allows the "missing" code to filter the
2217 # file out.
2218 if not deleted:
2219 primary, uris = butler.datastore.getURIs(ref)
2220 if primary:
2221 primary.remove()
2222 for uri in uris.values():
2223 uri.remove()
2224 n_expected -= 1
2225 deleted.add(ref)
2227 # Remove the datastore record.
2228 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
2230 if index < 2:
2231 source_refs.append(ref)
2232 if ref not in deleted:
2233 new_metric = butler.get(ref.unresolved(), collections=run)
2234 self.assertEqual(new_metric, metric)
2236 # Create some bad dataset types to ensure we check for inconsistent
2237 # definitions.
2238 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
2239 for datasetTypeName in datasetTypeNames:
2240 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
2241 self.target_butler.registry.registerDatasetType(datasetType)
2242 with self.assertRaises(ConflictingDefinitionError) as cm:
2243 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map)
2244 self.assertIn("dataset type differs", str(cm.exception))
2246 # And remove the bad definitions.
2247 for datasetTypeName in datasetTypeNames:
2248 self.target_butler.registry.removeDatasetType(datasetTypeName)
2250 # Transfer without creating dataset types should fail.
2251 with self.assertRaises(KeyError):
2252 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map)
2254 # Transfer without creating dimensions should fail.
2255 with self.assertRaises(ConflictingDefinitionError) as cm:
2256 self.target_butler.transfer_from(
2257 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True
2258 )
2259 self.assertIn("dimension", str(cm.exception))
2261 # The failed transfer above leaves registry in an inconsistent
2262 # state because the run is created but then rolled back without
2263 # the collection cache being cleared. For now force a refresh.
2264 # Can remove with DM-35498.
2265 self.target_butler.registry.refresh()
2267 # Now transfer them to the second butler, including dimensions.
2268 with self.assertLogs(level=logging.DEBUG) as cm:
2269 transferred = self.target_butler.transfer_from(
2270 self.source_butler,
2271 source_refs,
2272 id_gen_map=id_gen_map,
2273 register_dataset_types=True,
2274 transfer_dimensions=True,
2275 )
2276 self.assertEqual(len(transferred), n_expected)
2277 log_output = ";".join(cm.output)
2278 self.assertIn("found in datastore for chunk", log_output)
2279 self.assertIn("Creating output run", log_output)
2281 # Do the transfer twice to ensure that it will do nothing extra.
2282 # Only do this if purge=True because it does not work for int
2283 # dataset_id.
2284 if purge:
2285 # This should not need to register dataset types.
2286 transferred = self.target_butler.transfer_from(
2287 self.source_butler, source_refs, id_gen_map=id_gen_map
2288 )
2289 self.assertEqual(len(transferred), n_expected)
2291 # Also do an explicit low-level transfer to trigger some
2292 # edge cases.
2293 with self.assertLogs(level=logging.DEBUG) as cm:
2294 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs)
2295 log_output = ";".join(cm.output)
2296 self.assertIn("no file artifacts exist", log_output)
2298 with self.assertRaises(TypeError):
2299 self.target_butler.datastore.transfer_from(self.source_butler, source_refs)
2301 with self.assertRaises(ValueError):
2302 self.target_butler.datastore.transfer_from(
2303 self.source_butler.datastore, source_refs, transfer="split"
2304 )
2306 # Now try to get the same refs from the new butler.
2307 for ref in source_refs:
2308 if ref not in deleted:
2309 unresolved_ref = ref.unresolved()
2310 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run)
2311 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run)
2312 self.assertEqual(new_metric, old_metric)
2314 # Now prune run2 collection and create instead a CHAINED collection.
2315 # This should block the transfer.
2316 self.target_butler.pruneCollection("run2", purge=True, unstore=True)
2317 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
2318 with self.assertRaises(CollectionTypeError):
2319 # Re-importing the run1 datasets can be problematic if they
2320 # use integer IDs so filter those out.
2321 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
2322 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map)
2325if __name__ == "__main__": 2325 ↛ 2326line 2325 didn't jump to line 2326, because the condition on line 2325 was never true
2326 unittest.main()