Coverage for tests/test_butler.py: 13%
1065 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-14 02:05 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-14 02:05 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import gc
26import logging
27import os
28import pathlib
29import pickle
30import posixpath
31import random
32import shutil
33import string
34import tempfile
35import unittest
37try:
38 import boto3
39 import botocore
40 from moto import mock_s3
41except ImportError:
42 boto3 = None
44 def mock_s3(cls):
45 """A no-op decorator in case moto mock_s3 can not be imported."""
46 return cls
49try:
50 # It's possible but silly to have testing.postgresql installed without
51 # having the postgresql server installed (because then nothing in
52 # testing.postgresql would work), so we use the presence of that module
53 # to test whether we can expect the server to be available.
54 import testing.postgresql
55except ImportError:
56 testing = None
58import astropy.time
59import sqlalchemy
60from lsst.daf.butler import (
61 Butler,
62 ButlerConfig,
63 CollectionType,
64 Config,
65 DatasetIdGenEnum,
66 DatasetRef,
67 DatasetType,
68 FileDataset,
69 FileTemplate,
70 FileTemplateValidationError,
71 StorageClassFactory,
72 ValidationError,
73 script,
74)
75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
76from lsst.daf.butler.registry import (
77 CollectionError,
78 CollectionTypeError,
79 ConflictingDefinitionError,
80 DataIdValueError,
81 MissingCollectionError,
82)
83from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter
84from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir
85from lsst.resources import ResourcePath
86from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
87from lsst.utils import doImport
88from lsst.utils.introspection import get_full_type_name
90TESTDIR = os.path.abspath(os.path.dirname(__file__))
93def makeExampleMetrics():
94 return MetricsExample(
95 {"AM1": 5.2, "AM2": 30.6},
96 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
97 [563, 234, 456.7, 752, 8, 9, 27],
98 )
101class TransactionTestError(Exception):
102 """Specific error for testing transactions, to prevent misdiagnosing
103 that might otherwise occur when a standard exception is used.
104 """
106 pass
109class ButlerConfigTests(unittest.TestCase):
110 """Simple tests for ButlerConfig that are not tested in any other test
111 cases."""
113 def testSearchPath(self):
114 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
115 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
116 config1 = ButlerConfig(configFile)
117 self.assertNotIn("testConfigs", "\n".join(cm.output))
119 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
120 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
121 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
122 self.assertIn("testConfigs", "\n".join(cm.output))
124 key = ("datastore", "records", "table")
125 self.assertNotEqual(config1[key], config2[key])
126 self.assertEqual(config2[key], "override_record")
129class ButlerPutGetTests:
130 """Helper method for running a suite of put/get tests from different
131 butler configurations."""
133 root = None
134 default_run = "ingésτ😺"
136 @staticmethod
137 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
138 """Create a DatasetType and register it"""
139 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
140 registry.registerDatasetType(datasetType)
141 return datasetType
143 @classmethod
144 def setUpClass(cls):
145 cls.storageClassFactory = StorageClassFactory()
146 cls.storageClassFactory.addFromConfig(cls.configFile)
148 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
149 datasetType = datasetRef.datasetType
150 dataId = datasetRef.dataId
151 deferred = butler.getDirectDeferred(datasetRef)
153 for component in components:
154 compTypeName = datasetType.componentTypeName(component)
155 result = butler.get(compTypeName, dataId, collections=collections)
156 self.assertEqual(result, getattr(reference, component))
157 result_deferred = deferred.get(component=component)
158 self.assertEqual(result_deferred, result)
160 def tearDown(self):
161 removeTestTempDir(self.root)
163 def create_butler(self, run, storageClass, datasetTypeName):
164 butler = Butler(self.tmpConfigFile, run=run)
166 collections = set(butler.registry.queryCollections())
167 self.assertEqual(collections, set([run]))
169 # Create and register a DatasetType
170 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
172 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
174 # Add needed Dimensions
175 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
176 butler.registry.insertDimensionData(
177 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
178 )
179 butler.registry.insertDimensionData(
180 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"}
181 )
182 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
183 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
184 butler.registry.insertDimensionData(
185 "visit",
186 {
187 "instrument": "DummyCamComp",
188 "id": 423,
189 "name": "fourtwentythree",
190 "physical_filter": "d-r",
191 "visit_system": 1,
192 "datetime_begin": visit_start,
193 "datetime_end": visit_end,
194 },
195 )
197 # Add more visits for some later tests
198 for visit_id in (424, 425):
199 butler.registry.insertDimensionData(
200 "visit",
201 {
202 "instrument": "DummyCamComp",
203 "id": visit_id,
204 "name": f"fourtwentyfour_{visit_id}",
205 "physical_filter": "d-r",
206 "visit_system": 1,
207 },
208 )
209 return butler, datasetType
211 def runPutGetTest(self, storageClass, datasetTypeName):
212 # New datasets will be added to run and tag, but we will only look in
213 # tag when looking up datasets.
214 run = self.default_run
215 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
217 # Create and store a dataset
218 metric = makeExampleMetrics()
219 dataId = {"instrument": "DummyCamComp", "visit": 423}
221 # Create a DatasetRef for put
222 refIn = DatasetRef(datasetType, dataId, id=None)
224 # Put with a preexisting id should fail
225 with self.assertRaises(ValueError):
226 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
228 # Put and remove the dataset once as a DatasetRef, once as a dataId,
229 # and once with a DatasetType
231 # Keep track of any collections we add and do not clean up
232 expected_collections = {run}
234 counter = 0
235 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
236 # Since we are using subTest we can get cascading failures
237 # here with the first attempt failing and the others failing
238 # immediately because the dataset already exists. Work around
239 # this by using a distinct run collection each time
240 counter += 1
241 this_run = f"put_run_{counter}"
242 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
243 expected_collections.update({this_run})
245 with self.subTest(args=args):
246 ref = butler.put(metric, *args, run=this_run)
247 self.assertIsInstance(ref, DatasetRef)
249 # Test getDirect
250 metricOut = butler.getDirect(ref)
251 self.assertEqual(metric, metricOut)
252 # Test get
253 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
254 self.assertEqual(metric, metricOut)
255 # Test get with a datasetRef
256 metricOut = butler.get(ref, collections=this_run)
257 self.assertEqual(metric, metricOut)
258 # Test getDeferred with dataId
259 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
260 self.assertEqual(metric, metricOut)
261 # Test getDeferred with a datasetRef
262 metricOut = butler.getDeferred(ref, collections=this_run).get()
263 self.assertEqual(metric, metricOut)
264 # and deferred direct with ref
265 metricOut = butler.getDirectDeferred(ref).get()
266 self.assertEqual(metric, metricOut)
268 # Check we can get components
269 if storageClass.isComposite():
270 self.assertGetComponents(
271 butler, ref, ("summary", "data", "output"), metric, collections=this_run
272 )
274 # Can the artifacts themselves be retrieved?
275 if not butler.datastore.isEphemeral:
276 root_uri = ResourcePath(self.root)
278 for preserve_path in (True, False):
279 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
280 # Use copy so that we can test that overwrite
281 # protection works (using "auto" for File URIs would
282 # use hard links and subsequent transfer would work
283 # because it knows they are the same file).
284 transferred = butler.retrieveArtifacts(
285 [ref], destination, preserve_path=preserve_path, transfer="copy"
286 )
287 self.assertGreater(len(transferred), 0)
288 artifacts = list(ResourcePath.findFileResources([destination]))
289 self.assertEqual(set(transferred), set(artifacts))
291 for artifact in transferred:
292 path_in_destination = artifact.relative_to(destination)
293 self.assertIsNotNone(path_in_destination)
295 # when path is not preserved there should not be
296 # any path separators.
297 num_seps = path_in_destination.count("/")
298 if preserve_path:
299 self.assertGreater(num_seps, 0)
300 else:
301 self.assertEqual(num_seps, 0)
303 primary_uri, secondary_uris = butler.datastore.getURIs(ref)
304 n_uris = len(secondary_uris)
305 if primary_uri:
306 n_uris += 1
307 self.assertEqual(
308 len(artifacts),
309 n_uris,
310 "Comparing expected artifacts vs actual:"
311 f" {artifacts} vs {primary_uri} and {secondary_uris}",
312 )
314 if preserve_path:
315 # No need to run these twice
316 with self.assertRaises(ValueError):
317 butler.retrieveArtifacts([ref], destination, transfer="move")
319 with self.assertRaises(FileExistsError):
320 butler.retrieveArtifacts([ref], destination)
322 transferred_again = butler.retrieveArtifacts(
323 [ref], destination, preserve_path=preserve_path, overwrite=True
324 )
325 self.assertEqual(set(transferred_again), set(transferred))
327 # Now remove the dataset completely.
328 butler.pruneDatasets([ref], purge=True, unstore=True)
329 # Lookup with original args should still fail.
330 with self.assertRaises(LookupError):
331 butler.datasetExists(*args, collections=this_run)
332 # getDirect() should still fail.
333 with self.assertRaises(FileNotFoundError):
334 butler.getDirect(ref)
335 # Registry shouldn't be able to find it by dataset_id anymore.
336 self.assertIsNone(butler.registry.getDataset(ref.id))
338 # Do explicit registry removal since we know they are
339 # empty
340 butler.registry.removeCollection(this_run)
341 expected_collections.remove(this_run)
343 # Put the dataset again, since the last thing we did was remove it
344 # and we want to use the default collection.
345 ref = butler.put(metric, refIn)
347 # Get with parameters
348 stop = 4
349 sliced = butler.get(ref, parameters={"slice": slice(stop)})
350 self.assertNotEqual(metric, sliced)
351 self.assertEqual(metric.summary, sliced.summary)
352 self.assertEqual(metric.output, sliced.output)
353 self.assertEqual(metric.data[:stop], sliced.data)
354 # getDeferred with parameters
355 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
356 self.assertNotEqual(metric, sliced)
357 self.assertEqual(metric.summary, sliced.summary)
358 self.assertEqual(metric.output, sliced.output)
359 self.assertEqual(metric.data[:stop], sliced.data)
360 # getDeferred with deferred parameters
361 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
362 self.assertNotEqual(metric, sliced)
363 self.assertEqual(metric.summary, sliced.summary)
364 self.assertEqual(metric.output, sliced.output)
365 self.assertEqual(metric.data[:stop], sliced.data)
367 if storageClass.isComposite():
368 # Check that components can be retrieved
369 metricOut = butler.get(ref.datasetType.name, dataId)
370 compNameS = ref.datasetType.componentTypeName("summary")
371 compNameD = ref.datasetType.componentTypeName("data")
372 summary = butler.get(compNameS, dataId)
373 self.assertEqual(summary, metric.summary)
374 data = butler.get(compNameD, dataId)
375 self.assertEqual(data, metric.data)
377 if "counter" in storageClass.derivedComponents:
378 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
379 self.assertEqual(count, len(data))
381 count = butler.get(
382 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)}
383 )
384 self.assertEqual(count, stop)
386 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
387 summary = butler.getDirect(compRef)
388 self.assertEqual(summary, metric.summary)
390 # Create a Dataset type that has the same name but is inconsistent.
391 inconsistentDatasetType = DatasetType(
392 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config")
393 )
395 # Getting with a dataset type that does not match registry fails
396 with self.assertRaises(ValueError):
397 butler.get(inconsistentDatasetType, dataId)
399 # Combining a DatasetRef with a dataId should fail
400 with self.assertRaises(ValueError):
401 butler.get(ref, dataId)
402 # Getting with an explicit ref should fail if the id doesn't match
403 with self.assertRaises(ValueError):
404 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
406 # Getting a dataset with unknown parameters should fail
407 with self.assertRaises(KeyError):
408 butler.get(ref, parameters={"unsupported": True})
410 # Check we have a collection
411 collections = set(butler.registry.queryCollections())
412 self.assertEqual(collections, expected_collections)
414 # Clean up to check that we can remove something that may have
415 # already had a component removed
416 butler.pruneDatasets([ref], unstore=True, purge=True)
418 # Check that we can configure a butler to accept a put even
419 # if it already has the dataset in registry.
420 ref = butler.put(metric, refIn)
422 # Repeat put will fail.
423 with self.assertRaises(ConflictingDefinitionError):
424 butler.put(metric, refIn)
426 # Remove the datastore entry.
427 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
429 # Put will still fail
430 with self.assertRaises(ConflictingDefinitionError):
431 butler.put(metric, refIn)
433 # Allow the put to succeed
434 butler._allow_put_of_predefined_dataset = True
435 ref2 = butler.put(metric, refIn)
436 self.assertEqual(ref2.id, ref.id)
438 # A second put will still fail but with a different exception
439 # than before.
440 with self.assertRaises(ConflictingDefinitionError):
441 butler.put(metric, refIn)
443 # Reset the flag to avoid confusion
444 butler._allow_put_of_predefined_dataset = False
446 # Leave the dataset in place since some downstream tests require
447 # something to be present
449 return butler
451 def testDeferredCollectionPassing(self):
452 # Construct a butler with no run or collection, but make it writeable.
453 butler = Butler(self.tmpConfigFile, writeable=True)
454 # Create and register a DatasetType
455 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
456 datasetType = self.addDatasetType(
457 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry
458 )
459 # Add needed Dimensions
460 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
461 butler.registry.insertDimensionData(
462 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
463 )
464 butler.registry.insertDimensionData(
465 "visit",
466 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
467 )
468 dataId = {"instrument": "DummyCamComp", "visit": 423}
469 # Create dataset.
470 metric = makeExampleMetrics()
471 # Register a new run and put dataset.
472 run = "deferred"
473 self.assertTrue(butler.registry.registerRun(run))
474 # Second time it will be allowed but indicate no-op
475 self.assertFalse(butler.registry.registerRun(run))
476 ref = butler.put(metric, datasetType, dataId, run=run)
477 # Putting with no run should fail with TypeError.
478 with self.assertRaises(CollectionError):
479 butler.put(metric, datasetType, dataId)
480 # Dataset should exist.
481 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
482 # We should be able to get the dataset back, but with and without
483 # a deferred dataset handle.
484 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
485 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
486 # Trying to find the dataset without any collection is a TypeError.
487 with self.assertRaises(CollectionError):
488 butler.datasetExists(datasetType, dataId)
489 with self.assertRaises(CollectionError):
490 butler.get(datasetType, dataId)
491 # Associate the dataset with a different collection.
492 butler.registry.registerCollection("tagged")
493 butler.registry.associate("tagged", [ref])
494 # Deleting the dataset from the new collection should make it findable
495 # in the original collection.
496 butler.pruneDatasets([ref], tags=["tagged"])
497 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
500class ButlerTests(ButlerPutGetTests):
501 """Tests for Butler."""
503 useTempRoot = True
505 def setUp(self):
506 """Create a new butler root for each test."""
507 self.root = makeTestTempDir(TESTDIR)
508 Butler.makeRepo(self.root, config=Config(self.configFile))
509 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
511 def testConstructor(self):
512 """Independent test of constructor."""
513 butler = Butler(self.tmpConfigFile, run=self.default_run)
514 self.assertIsInstance(butler, Butler)
516 # Check that butler.yaml is added automatically.
517 if self.tmpConfigFile.endswith(end := "/butler.yaml"):
518 config_dir = self.tmpConfigFile[: -len(end)]
519 butler = Butler(config_dir, run=self.default_run)
520 self.assertIsInstance(butler, Butler)
522 # Even with a ResourcePath.
523 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
524 self.assertIsInstance(butler, Butler)
526 collections = set(butler.registry.queryCollections())
527 self.assertEqual(collections, {self.default_run})
529 # Check that some special characters can be included in run name.
530 special_run = "u@b.c-A"
531 butler_special = Butler(butler=butler, run=special_run)
532 collections = set(butler_special.registry.queryCollections("*@*"))
533 self.assertEqual(collections, {special_run})
535 butler2 = Butler(butler=butler, collections=["other"])
536 self.assertEqual(butler2.collections, ("other",))
537 self.assertIsNone(butler2.run)
538 self.assertIs(butler.datastore, butler2.datastore)
540 # Test that we can use an environment variable to find this
541 # repository.
542 butler_index = Config()
543 butler_index["label"] = self.tmpConfigFile
544 for suffix in (".yaml", ".json"):
545 # Ensure that the content differs so that we know that
546 # we aren't reusing the cache.
547 bad_label = f"s3://bucket/not_real{suffix}"
548 butler_index["bad_label"] = bad_label
549 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
550 butler_index.dumpToUri(temp_file)
551 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
552 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label")))
553 uri = Butler.get_repo_uri("bad_label")
554 self.assertEqual(uri, ResourcePath(bad_label))
555 uri = Butler.get_repo_uri("label")
556 butler = Butler(uri, writeable=False)
557 self.assertIsInstance(butler, Butler)
558 butler = Butler("label", writeable=False)
559 self.assertIsInstance(butler, Butler)
560 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
561 Butler("not_there", writeable=False)
562 with self.assertRaises(KeyError) as cm:
563 Butler.get_repo_uri("missing")
564 self.assertIn("not known to", str(cm.exception))
565 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
566 with self.assertRaises(FileNotFoundError):
567 Butler.get_repo_uri("label")
568 self.assertEqual(Butler.get_known_repos(), set())
569 with self.assertRaises(KeyError) as cm:
570 # No environment variable set.
571 Butler.get_repo_uri("label")
572 self.assertIn("No repository index defined", str(cm.exception))
573 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"):
574 # No aliases registered.
575 Butler("not_there")
576 self.assertEqual(Butler.get_known_repos(), set())
578 def testBasicPutGet(self):
579 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
580 self.runPutGetTest(storageClass, "test_metric")
582 def testCompositePutGetConcrete(self):
583 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
584 butler = self.runPutGetTest(storageClass, "test_metric")
586 # Should *not* be disassembled
587 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
588 self.assertEqual(len(datasets), 1)
589 uri, components = butler.getURIs(datasets[0])
590 self.assertIsInstance(uri, ResourcePath)
591 self.assertFalse(components)
592 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
593 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
595 # Predicted dataset
596 dataId = {"instrument": "DummyCamComp", "visit": 424}
597 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
598 self.assertFalse(components)
599 self.assertIsInstance(uri, ResourcePath)
600 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
601 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
603 def testCompositePutGetVirtual(self):
604 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
605 butler = self.runPutGetTest(storageClass, "test_metric_comp")
607 # Should be disassembled
608 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
609 self.assertEqual(len(datasets), 1)
610 uri, components = butler.getURIs(datasets[0])
612 if butler.datastore.isEphemeral:
613 # Never disassemble in-memory datastore
614 self.assertIsInstance(uri, ResourcePath)
615 self.assertFalse(components)
616 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
617 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
618 else:
619 self.assertIsNone(uri)
620 self.assertEqual(set(components), set(storageClass.components))
621 for compuri in components.values():
622 self.assertIsInstance(compuri, ResourcePath)
623 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
624 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
626 # Predicted dataset
627 dataId = {"instrument": "DummyCamComp", "visit": 424}
628 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
630 if butler.datastore.isEphemeral:
631 # Never disassembled
632 self.assertIsInstance(uri, ResourcePath)
633 self.assertFalse(components)
634 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
635 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
636 else:
637 self.assertIsNone(uri)
638 self.assertEqual(set(components), set(storageClass.components))
639 for compuri in components.values():
640 self.assertIsInstance(compuri, ResourcePath)
641 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
642 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
644 def testStorageClassOverrideGet(self):
645 """Test storage class conversion on get with override."""
646 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
647 datasetTypeName = "anything"
648 run = self.default_run
650 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
652 # Create and store a dataset.
653 metric = makeExampleMetrics()
654 dataId = {"instrument": "DummyCamComp", "visit": 423}
656 ref = butler.put(metric, datasetType, dataId)
658 # Return native type.
659 retrieved = butler.get(ref)
660 self.assertEqual(retrieved, metric)
662 # Specify an override.
663 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
664 model = butler.getDirect(ref, storageClass=new_sc)
665 self.assertNotEqual(type(model), type(retrieved))
666 self.assertIs(type(model), new_sc.pytype)
667 self.assertEqual(retrieved, model)
669 # Defer but override later.
670 deferred = butler.getDirectDeferred(ref)
671 model = deferred.get(storageClass=new_sc)
672 self.assertIs(type(model), new_sc.pytype)
673 self.assertEqual(retrieved, model)
675 # Defer but override up front.
676 deferred = butler.getDirectDeferred(ref, storageClass=new_sc)
677 model = deferred.get()
678 self.assertIs(type(model), new_sc.pytype)
679 self.assertEqual(retrieved, model)
681 # Retrieve a component. Should be a tuple.
682 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple")
683 self.assertIs(type(data), tuple)
684 self.assertEqual(data, tuple(retrieved.data))
686 # Parameter on the write storage class should work regardless
687 # of read storage class.
688 data = butler.get(
689 "anything.data",
690 dataId,
691 storageClass="StructuredDataDataTestTuple",
692 parameters={"slice": slice(2, 4)},
693 )
694 self.assertEqual(len(data), 2)
696 # Try a parameter that is known to the read storage class but not
697 # the write storage class.
698 with self.assertRaises(KeyError):
699 butler.get(
700 "anything.data",
701 dataId,
702 storageClass="StructuredDataDataTestTuple",
703 parameters={"xslice": slice(2, 4)},
704 )
706 def testPytypePutCoercion(self):
707 """Test python type coercion on Butler.get and put."""
709 # Store some data with the normal example storage class.
710 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
711 datasetTypeName = "test_metric"
712 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)
714 dataId = {"instrument": "DummyCamComp", "visit": 423}
716 # Put a dict and this should coerce to a MetricsExample
717 test_dict = {"summary": {"a": 1}, "output": {"b": 2}}
718 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424)
719 test_metric = butler.getDirect(metric_ref)
720 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample")
721 self.assertEqual(test_metric.summary, test_dict["summary"])
722 self.assertEqual(test_metric.output, test_dict["output"])
724 # Check that the put still works if a DatasetType is given with
725 # a definition matching this python type.
726 registry_type = butler.registry.getDatasetType(datasetTypeName)
727 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson")
728 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425)
729 self.assertEqual(metric2_ref.datasetType, registry_type)
731 # The get will return the type expected by registry.
732 test_metric2 = butler.getDirect(metric2_ref)
733 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample")
735 # Make a new DatasetRef with the compatible but different DatasetType.
736 # This should now return a dict.
737 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run)
738 test_dict2 = butler.getDirect(new_ref)
739 self.assertEqual(get_full_type_name(test_dict2), "dict")
741 # Get it again with the wrong dataset type definition using get()
742 # rather than getDirect(). This should be consistent with getDirect()
743 # behavior and return the type of the DatasetType.
744 test_dict3 = butler.get(this_type, dataId=dataId, visit=425)
745 self.assertEqual(get_full_type_name(test_dict3), "dict")
747 def testIngest(self):
748 butler = Butler(self.tmpConfigFile, run=self.default_run)
750 # Create and register a DatasetType
751 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
753 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
754 datasetTypeName = "metric"
756 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
758 # Add needed Dimensions
759 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
760 butler.registry.insertDimensionData(
761 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
762 )
763 for detector in (1, 2):
764 butler.registry.insertDimensionData(
765 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"}
766 )
768 butler.registry.insertDimensionData(
769 "visit",
770 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
771 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"},
772 )
774 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
775 dataRoot = os.path.join(TESTDIR, "data", "basic")
776 datasets = []
777 for detector in (1, 2):
778 detector_name = f"detector_{detector}"
779 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
780 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
781 # Create a DatasetRef for ingest
782 refIn = DatasetRef(datasetType, dataId, id=None)
784 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter))
786 butler.ingest(*datasets, transfer="copy")
788 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
789 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
791 metrics1 = butler.get(datasetTypeName, dataId1)
792 metrics2 = butler.get(datasetTypeName, dataId2)
793 self.assertNotEqual(metrics1, metrics2)
795 # Compare URIs
796 uri1 = butler.getURI(datasetTypeName, dataId1)
797 uri2 = butler.getURI(datasetTypeName, dataId2)
798 self.assertNotEqual(uri1, uri2)
800 # Now do a multi-dataset but single file ingest
801 metricFile = os.path.join(dataRoot, "detectors.yaml")
802 refs = []
803 for detector in (1, 2):
804 detector_name = f"detector_{detector}"
805 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
806 # Create a DatasetRef for ingest
807 refs.append(DatasetRef(datasetType, dataId, id=None))
809 # Test "move" transfer to ensure that the files themselves
810 # have disappeared following ingest.
811 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile:
812 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy")
814 datasets = []
815 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter))
817 butler.ingest(*datasets, transfer="move", record_validation_info=False)
818 self.assertFalse(tempFile.exists())
820 # Check that the datastore recorded no file size.
821 # Not all datastores can support this.
822 try:
823 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0])
824 self.assertEqual(infos[0].file_size, -1)
825 except AttributeError:
826 pass
828 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
829 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
831 multi1 = butler.get(datasetTypeName, dataId1)
832 multi2 = butler.get(datasetTypeName, dataId2)
834 self.assertEqual(multi1, metrics1)
835 self.assertEqual(multi2, metrics2)
837 # Compare URIs
838 uri1 = butler.getURI(datasetTypeName, dataId1)
839 uri2 = butler.getURI(datasetTypeName, dataId2)
840 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
842 # Test that removing one does not break the second
843 # This line will issue a warning log message for a ChainedDatastore
844 # that uses an InMemoryDatastore since in-memory can not ingest
845 # files.
846 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
847 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
848 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
849 multi2b = butler.get(datasetTypeName, dataId2)
850 self.assertEqual(multi2, multi2b)
852 def testPickle(self):
853 """Test pickle support."""
854 butler = Butler(self.tmpConfigFile, run=self.default_run)
855 butlerOut = pickle.loads(pickle.dumps(butler))
856 self.assertIsInstance(butlerOut, Butler)
857 self.assertEqual(butlerOut._config, butler._config)
858 self.assertEqual(butlerOut.collections, butler.collections)
859 self.assertEqual(butlerOut.run, butler.run)
861 def testGetDatasetTypes(self):
862 butler = Butler(self.tmpConfigFile, run=self.default_run)
863 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
864 dimensionEntries = [
865 (
866 "instrument",
867 {"instrument": "DummyCam"},
868 {"instrument": "DummyHSC"},
869 {"instrument": "DummyCamComp"},
870 ),
871 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
872 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
873 ]
874 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
875 # Add needed Dimensions
876 for args in dimensionEntries:
877 butler.registry.insertDimensionData(*args)
879 # When a DatasetType is added to the registry entries are not created
880 # for components but querying them can return the components.
881 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
882 components = set()
883 for datasetTypeName in datasetTypeNames:
884 # Create and register a DatasetType
885 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
887 for componentName in storageClass.components:
888 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
890 fromRegistry: set[DatasetType] = set()
891 for parent_dataset_type in butler.registry.queryDatasetTypes():
892 fromRegistry.add(parent_dataset_type)
893 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes())
894 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
896 # Now that we have some dataset types registered, validate them
897 butler.validateConfiguration(
898 ignore=[
899 "test_metric_comp",
900 "metric3",
901 "metric5",
902 "calexp",
903 "DummySC",
904 "datasetType.component",
905 "random_data",
906 "random_data_2",
907 ]
908 )
910 # Add a new datasetType that will fail template validation
911 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
912 if self.validationCanFail:
913 with self.assertRaises(ValidationError):
914 butler.validateConfiguration()
916 # Rerun validation but with a subset of dataset type names
917 butler.validateConfiguration(datasetTypeNames=["metric4"])
919 # Rerun validation but ignore the bad datasetType
920 butler.validateConfiguration(
921 ignore=[
922 "test_metric_comp",
923 "metric3",
924 "metric5",
925 "calexp",
926 "DummySC",
927 "datasetType.component",
928 "random_data",
929 "random_data_2",
930 ]
931 )
933 def testTransaction(self):
934 butler = Butler(self.tmpConfigFile, run=self.default_run)
935 datasetTypeName = "test_metric"
936 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
937 dimensionEntries = (
938 ("instrument", {"instrument": "DummyCam"}),
939 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
940 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
941 )
942 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
943 metric = makeExampleMetrics()
944 dataId = {"instrument": "DummyCam", "visit": 42}
945 # Create and register a DatasetType
946 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
947 with self.assertRaises(TransactionTestError):
948 with butler.transaction():
949 # Add needed Dimensions
950 for args in dimensionEntries:
951 butler.registry.insertDimensionData(*args)
952 # Store a dataset
953 ref = butler.put(metric, datasetTypeName, dataId)
954 self.assertIsInstance(ref, DatasetRef)
955 # Test getDirect
956 metricOut = butler.getDirect(ref)
957 self.assertEqual(metric, metricOut)
958 # Test get
959 metricOut = butler.get(datasetTypeName, dataId)
960 self.assertEqual(metric, metricOut)
961 # Check we can get components
962 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric)
963 raise TransactionTestError("This should roll back the entire transaction")
964 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"):
965 butler.registry.expandDataId(dataId)
966 # Should raise LookupError for missing data ID value
967 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
968 butler.get(datasetTypeName, dataId)
969 # Also check explicitly if Dataset entry is missing
970 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
971 # Direct retrieval should not find the file in the Datastore
972 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
973 butler.getDirect(ref)
975 def testMakeRepo(self):
976 """Test that we can write butler configuration to a new repository via
977 the Butler.makeRepo interface and then instantiate a butler from the
978 repo root.
979 """
980 # Do not run the test if we know this datastore configuration does
981 # not support a file system root
982 if self.fullConfigKey is None:
983 return
985 # create two separate directories
986 root1 = tempfile.mkdtemp(dir=self.root)
987 root2 = tempfile.mkdtemp(dir=self.root)
989 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
990 limited = Config(self.configFile)
991 butler1 = Butler(butlerConfig)
992 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
993 full = Config(self.tmpConfigFile)
994 butler2 = Butler(butlerConfig)
995 # Butlers should have the same configuration regardless of whether
996 # defaults were expanded.
997 self.assertEqual(butler1._config, butler2._config)
998 # Config files loaded directly should not be the same.
999 self.assertNotEqual(limited, full)
1000 # Make sure "limited" doesn't have a few keys we know it should be
1001 # inheriting from defaults.
1002 self.assertIn(self.fullConfigKey, full)
1003 self.assertNotIn(self.fullConfigKey, limited)
1005 # Collections don't appear until something is put in them
1006 collections1 = set(butler1.registry.queryCollections())
1007 self.assertEqual(collections1, set())
1008 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
1010 # Check that a config with no associated file name will not
1011 # work properly with relocatable Butler repo
1012 butlerConfig.configFile = None
1013 with self.assertRaises(ValueError):
1014 Butler(butlerConfig)
1016 with self.assertRaises(FileExistsError):
1017 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)
1019 def testStringification(self):
1020 butler = Butler(self.tmpConfigFile, run=self.default_run)
1021 butlerStr = str(butler)
1023 if self.datastoreStr is not None:
1024 for testStr in self.datastoreStr:
1025 self.assertIn(testStr, butlerStr)
1026 if self.registryStr is not None:
1027 self.assertIn(self.registryStr, butlerStr)
1029 datastoreName = butler.datastore.name
1030 if self.datastoreName is not None:
1031 for testStr in self.datastoreName:
1032 self.assertIn(testStr, datastoreName)
1034 def testButlerRewriteDataId(self):
1035 """Test that dataIds can be rewritten based on dimension records."""
1037 butler = Butler(self.tmpConfigFile, run=self.default_run)
1039 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1040 datasetTypeName = "random_data"
1042 # Create dimension records.
1043 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1044 butler.registry.insertDimensionData(
1045 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1046 )
1047 butler.registry.insertDimensionData(
1048 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1049 )
1051 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"])
1052 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1053 butler.registry.registerDatasetType(datasetType)
1055 n_exposures = 5
1056 dayobs = 20210530
1058 for i in range(n_exposures):
1059 butler.registry.insertDimensionData(
1060 "exposure",
1061 {
1062 "instrument": "DummyCamComp",
1063 "id": i,
1064 "obs_id": f"exp{i}",
1065 "seq_num": i,
1066 "day_obs": dayobs,
1067 "physical_filter": "d-r",
1068 },
1069 )
1071 # Write some data.
1072 for i in range(n_exposures):
1073 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]}
1075 # Use the seq_num for the put to test rewriting.
1076 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1077 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1079 # Check that the exposure is correct in the dataId
1080 self.assertEqual(ref.dataId["exposure"], i)
1082 # and check that we can get the dataset back with the same dataId
1083 new_metric = butler.get(datasetTypeName, dataId=dataId)
1084 self.assertEqual(new_metric, metric)
1087class FileDatastoreButlerTests(ButlerTests):
1088 """Common tests and specialization of ButlerTests for butlers backed
1089 by datastores that inherit from FileDatastore.
1090 """
1092 def checkFileExists(self, root, relpath):
1093 """Checks if file exists at a given path (relative to root).
1095 Test testPutTemplates verifies actual physical existance of the files
1096 in the requested location.
1097 """
1098 uri = ResourcePath(root, forceDirectory=True)
1099 return uri.join(relpath).exists()
1101 def testPutTemplates(self):
1102 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1103 butler = Butler(self.tmpConfigFile, run=self.default_run)
1105 # Add needed Dimensions
1106 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1107 butler.registry.insertDimensionData(
1108 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1109 )
1110 butler.registry.insertDimensionData(
1111 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"}
1112 )
1113 butler.registry.insertDimensionData(
1114 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"}
1115 )
1117 # Create and store a dataset
1118 metric = makeExampleMetrics()
1120 # Create two almost-identical DatasetTypes (both will use default
1121 # template)
1122 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
1123 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1124 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1125 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1127 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1128 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1130 # Put with exactly the data ID keys needed
1131 ref = butler.put(metric, "metric1", dataId1)
1132 uri = butler.getURI(ref)
1133 self.assertTrue(uri.exists())
1134 self.assertTrue(
1135 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle")
1136 )
1138 # Check the template based on dimensions
1139 if hasattr(butler.datastore, "templates"):
1140 butler.datastore.templates.validateTemplates([ref])
1142 # Put with extra data ID keys (physical_filter is an optional
1143 # dependency); should not change template (at least the way we're
1144 # defining them to behave now; the important thing is that they
1145 # must be consistent).
1146 ref = butler.put(metric, "metric2", dataId2)
1147 uri = butler.getURI(ref)
1148 self.assertTrue(uri.exists())
1149 self.assertTrue(
1150 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle")
1151 )
1153 # Check the template based on dimensions
1154 if hasattr(butler.datastore, "templates"):
1155 butler.datastore.templates.validateTemplates([ref])
1157 # Use a template that has a typo in dimension record metadata.
1158 # Easier to test with a butler that has a ref with records attached.
1159 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
1160 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1161 path = template.format(ref)
1162 self.assertEqual(path, f"a/v423/{ref.id}_fits")
1164 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
1165 with self.assertRaises(KeyError):
1166 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1167 template.format(ref)
1169 # Now use a file template that will not result in unique filenames
1170 with self.assertRaises(FileTemplateValidationError):
1171 butler.put(metric, "metric3", dataId1)
1173 def testImportExport(self):
1174 # Run put/get tests just to create and populate a repo.
1175 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1176 self.runImportExportTest(storageClass)
1178 @unittest.expectedFailure
1179 def testImportExportVirtualComposite(self):
1180 # Run put/get tests just to create and populate a repo.
1181 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1182 self.runImportExportTest(storageClass)
1184 def runImportExportTest(self, storageClass):
1185 """This test does an export to a temp directory and an import back
1186 into a new temp directory repo. It does not assume a posix datastore"""
1187 exportButler = self.runPutGetTest(storageClass, "test_metric")
1188 # Test that the repo actually has at least one dataset.
1189 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1190 self.assertGreater(len(datasets), 0)
1191 # Add a DimensionRecord that's unused by those datasets.
1192 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1193 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1194 # Export and then import datasets.
1195 with safeTestTempDir(TESTDIR) as exportDir:
1196 exportFile = os.path.join(exportDir, "exports.yaml")
1197 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1198 export.saveDatasets(datasets)
1199 # Export the same datasets again. This should quietly do
1200 # nothing because of internal deduplication, and it shouldn't
1201 # complain about being asked to export the "htm7" elements even
1202 # though there aren't any in these datasets or in the database.
1203 export.saveDatasets(datasets, elements=["htm7"])
1204 # Save one of the data IDs again; this should be harmless
1205 # because of internal deduplication.
1206 export.saveDataIds([datasets[0].dataId])
1207 # Save some dimension records directly.
1208 export.saveDimensionData("skymap", [skymapRecord])
1209 self.assertTrue(os.path.exists(exportFile))
1210 with safeTestTempDir(TESTDIR) as importDir:
1211 # We always want this to be a local posix butler
1212 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1213 # Calling script.butlerImport tests the implementation of the
1214 # butler command line interface "import" subcommand. Functions
1215 # in the script folder are generally considered protected and
1216 # should not be used as public api.
1217 with open(exportFile, "r") as f:
1218 script.butlerImport(
1219 importDir,
1220 export_file=f,
1221 directory=exportDir,
1222 transfer="auto",
1223 skip_dimensions=None,
1224 reuse_ids=False,
1225 )
1226 importButler = Butler(importDir, run=self.default_run)
1227 for ref in datasets:
1228 with self.subTest(ref=ref):
1229 # Test for existence by passing in the DatasetType and
1230 # data ID separately, to avoid lookup by dataset_id.
1231 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
1232 self.assertEqual(
1233 list(importButler.registry.queryDimensionRecords("skymap")),
1234 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)],
1235 )
1237 def testRemoveRuns(self):
1238 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1239 butler = Butler(self.tmpConfigFile, writeable=True)
1240 # Load registry data with dimensions to hang datasets off of.
1241 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1242 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1243 # Add some RUN-type collection.
1244 run1 = "run1"
1245 butler.registry.registerRun(run1)
1246 run2 = "run2"
1247 butler.registry.registerRun(run2)
1248 # put a dataset in each
1249 metric = makeExampleMetrics()
1250 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1251 datasetType = self.addDatasetType(
1252 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1253 )
1254 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1255 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1256 uri1 = butler.getURI(ref1, collections=[run1])
1257 uri2 = butler.getURI(ref2, collections=[run2])
1258 # Remove from both runs with different values for unstore.
1259 butler.removeRuns([run1], unstore=True)
1260 butler.removeRuns([run2], unstore=False)
1261 # Should be nothing in registry for either one, and datastore should
1262 # not think either exists.
1263 with self.assertRaises(MissingCollectionError):
1264 butler.registry.getCollectionType(run1)
1265 with self.assertRaises(MissingCollectionError):
1266 butler.registry.getCollectionType(run2)
1267 self.assertFalse(butler.datastore.exists(ref1))
1268 self.assertFalse(butler.datastore.exists(ref2))
1269 # The ref we unstored should be gone according to the URI, but the
1270 # one we forgot should still be around.
1271 self.assertFalse(uri1.exists())
1272 self.assertTrue(uri2.exists())
1275class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1276 """PosixDatastore specialization of a butler"""
1278 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1279 fullConfigKey = ".datastore.formatters"
1280 validationCanFail = True
1281 datastoreStr = ["/tmp"]
1282 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1283 registryStr = "/gen3.sqlite3"
1285 def testPathConstructor(self):
1286 """Independent test of constructor using PathLike."""
1287 butler = Butler(self.tmpConfigFile, run=self.default_run)
1288 self.assertIsInstance(butler, Butler)
1290 # And again with a Path object with the butler yaml
1291 path = pathlib.Path(self.tmpConfigFile)
1292 butler = Butler(path, writeable=False)
1293 self.assertIsInstance(butler, Butler)
1295 # And again with a Path object without the butler yaml
1296 # (making sure we skip it if the tmp config doesn't end
1297 # in butler.yaml -- which is the case for a subclass)
1298 if self.tmpConfigFile.endswith("butler.yaml"):
1299 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1300 butler = Butler(path, writeable=False)
1301 self.assertIsInstance(butler, Butler)
1303 def testExportTransferCopy(self):
1304 """Test local export using all transfer modes"""
1305 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1306 exportButler = self.runPutGetTest(storageClass, "test_metric")
1307 # Test that the repo actually has at least one dataset.
1308 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1309 self.assertGreater(len(datasets), 0)
1310 uris = [exportButler.getURI(d) for d in datasets]
1311 datastoreRoot = exportButler.datastore.root
1313 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1315 for path in pathsInStore:
1316 # Assume local file system
1317 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}")
1319 for transfer in ("copy", "link", "symlink", "relsymlink"):
1320 with safeTestTempDir(TESTDIR) as exportDir:
1321 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export:
1322 export.saveDatasets(datasets)
1323 for path in pathsInStore:
1324 self.assertTrue(
1325 self.checkFileExists(exportDir, path),
1326 f"Check that mode {transfer} exported files",
1327 )
1329 def testPruneDatasets(self):
1330 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1331 butler = Butler(self.tmpConfigFile, writeable=True)
1332 # Load registry data with dimensions to hang datasets off of.
1333 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1334 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1335 # Add some RUN-type collections.
1336 run1 = "run1"
1337 butler.registry.registerRun(run1)
1338 run2 = "run2"
1339 butler.registry.registerRun(run2)
1340 # put some datasets. ref1 and ref2 have the same data ID, and are in
1341 # different runs. ref3 has a different data ID.
1342 metric = makeExampleMetrics()
1343 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1344 datasetType = self.addDatasetType(
1345 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1346 )
1347 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1348 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1349 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1351 # Simple prune.
1352 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1353 with self.assertRaises(LookupError):
1354 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1)
1356 # Put data back.
1357 ref1 = butler.put(metric, ref1.unresolved(), run=run1)
1358 ref2 = butler.put(metric, ref2.unresolved(), run=run2)
1359 ref3 = butler.put(metric, ref3.unresolved(), run=run1)
1361 # Check that in normal mode, deleting the record will lead to
1362 # trash not touching the file.
1363 uri1 = butler.datastore.getURI(ref1)
1364 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table
1365 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id})
1366 butler.datastore.trash(ref1)
1367 butler.datastore.emptyTrash()
1368 self.assertTrue(uri1.exists())
1369 uri1.remove() # Clean it up.
1371 # Simulate execution butler setup by deleting the datastore
1372 # record but keeping the file around and trusting.
1373 butler.datastore.trustGetRequest = True
1374 uri2 = butler.datastore.getURI(ref2)
1375 uri3 = butler.datastore.getURI(ref3)
1376 self.assertTrue(uri2.exists())
1377 self.assertTrue(uri3.exists())
1379 # Remove the datastore record.
1380 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table
1381 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id})
1382 self.assertTrue(uri2.exists())
1383 butler.datastore.trash([ref2, ref3])
1384 # Immediate removal for ref2 file
1385 self.assertFalse(uri2.exists())
1386 # But ref3 has to wait for the empty.
1387 self.assertTrue(uri3.exists())
1388 butler.datastore.emptyTrash()
1389 self.assertFalse(uri3.exists())
1391 # Clear out the datasets from registry.
1392 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1394 def testPytypeCoercion(self):
1395 """Test python type coercion on Butler.get and put."""
1397 # Store some data with the normal example storage class.
1398 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1399 datasetTypeName = "test_metric"
1400 butler = self.runPutGetTest(storageClass, datasetTypeName)
1402 dataId = {"instrument": "DummyCamComp", "visit": 423}
1403 metric = butler.get(datasetTypeName, dataId=dataId)
1404 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample")
1406 datasetType_ori = butler.registry.getDatasetType(datasetTypeName)
1407 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents")
1409 # Now need to hack the registry dataset type definition.
1410 # There is no API for this.
1411 manager = butler.registry._managers.datasets
1412 manager._db.update(
1413 manager._static.dataset_type,
1414 {"name": datasetTypeName},
1415 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"},
1416 )
1418 # Force reset of dataset type cache
1419 butler.registry.refresh()
1421 datasetType_new = butler.registry.getDatasetType(datasetTypeName)
1422 self.assertEqual(datasetType_new.name, datasetType_ori.name)
1423 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel")
1425 metric_model = butler.get(datasetTypeName, dataId=dataId)
1426 self.assertNotEqual(type(metric_model), type(metric))
1427 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel")
1429 # Put the model and read it back to show that everything now
1430 # works as normal.
1431 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424)
1432 metric_model_new = butler.get(metric_ref)
1433 self.assertEqual(metric_model_new, metric_model)
1435 # Hack the storage class again to something that will fail on the
1436 # get with no conversion class.
1437 manager._db.update(
1438 manager._static.dataset_type,
1439 {"name": datasetTypeName},
1440 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"},
1441 )
1442 butler.registry.refresh()
1444 with self.assertRaises(ValueError):
1445 butler.get(datasetTypeName, dataId=dataId)
1448@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1449class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1450 """PosixDatastore specialization of a butler using Postgres"""
1452 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1453 fullConfigKey = ".datastore.formatters"
1454 validationCanFail = True
1455 datastoreStr = ["/tmp"]
1456 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1457 registryStr = "PostgreSQL@test"
1459 @staticmethod
1460 def _handler(postgresql):
1461 engine = sqlalchemy.engine.create_engine(postgresql.url())
1462 with engine.begin() as connection:
1463 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
1465 @classmethod
1466 def setUpClass(cls):
1467 # Create the postgres test server.
1468 cls.postgresql = testing.postgresql.PostgresqlFactory(
1469 cache_initialized_db=True, on_initialized=cls._handler
1470 )
1471 super().setUpClass()
1473 @classmethod
1474 def tearDownClass(cls):
1475 # Clean up any lingering SQLAlchemy engines/connections
1476 # so they're closed before we shut down the server.
1477 gc.collect()
1478 cls.postgresql.clear_cache()
1479 super().tearDownClass()
1481 def setUp(self):
1482 self.server = self.postgresql()
1484 # Need to add a registry section to the config.
1485 self._temp_config = False
1486 config = Config(self.configFile)
1487 config["registry", "db"] = self.server.url()
1488 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh:
1489 config.dump(fh)
1490 self.configFile = fh.name
1491 self._temp_config = True
1492 super().setUp()
1494 def tearDown(self):
1495 self.server.stop()
1496 if self._temp_config and os.path.exists(self.configFile):
1497 os.remove(self.configFile)
1498 super().tearDown()
1500 def testMakeRepo(self):
1501 # The base class test assumes that it's using sqlite and assumes
1502 # the config file is acceptable to sqlite.
1503 raise unittest.SkipTest("Postgres config is not compatible with this test.")
1506class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1507 """InMemoryDatastore specialization of a butler"""
1509 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1510 fullConfigKey = None
1511 useTempRoot = False
1512 validationCanFail = False
1513 datastoreStr = ["datastore='InMemory"]
1514 datastoreName = ["InMemoryDatastore@"]
1515 registryStr = "/gen3.sqlite3"
1517 def testIngest(self):
1518 pass
1521class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1522 """PosixDatastore specialization"""
1524 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1525 fullConfigKey = ".datastore.datastores.1.formatters"
1526 validationCanFail = True
1527 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1528 datastoreName = [
1529 "InMemoryDatastore@",
1530 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1531 "SecondDatastore",
1532 ]
1533 registryStr = "/gen3.sqlite3"
1536class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1537 """Test that a yaml file in one location can refer to a root in another."""
1539 datastoreStr = ["dir1"]
1540 # Disable the makeRepo test since we are deliberately not using
1541 # butler.yaml as the config name.
1542 fullConfigKey = None
1544 def setUp(self):
1545 self.root = makeTestTempDir(TESTDIR)
1547 # Make a new repository in one place
1548 self.dir1 = os.path.join(self.root, "dir1")
1549 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1551 # Move the yaml file to a different place and add a "root"
1552 self.dir2 = os.path.join(self.root, "dir2")
1553 os.makedirs(self.dir2, exist_ok=True)
1554 configFile1 = os.path.join(self.dir1, "butler.yaml")
1555 config = Config(configFile1)
1556 config["root"] = self.dir1
1557 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1558 config.dumpToUri(configFile2)
1559 os.remove(configFile1)
1560 self.tmpConfigFile = configFile2
1562 def testFileLocations(self):
1563 self.assertNotEqual(self.dir1, self.dir2)
1564 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1565 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1566 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1569class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1570 """Test that a config file created by makeRepo outside of repo works."""
1572 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1574 def setUp(self):
1575 self.root = makeTestTempDir(TESTDIR)
1576 self.root2 = makeTestTempDir(TESTDIR)
1578 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1579 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1581 def tearDown(self):
1582 if os.path.exists(self.root2):
1583 shutil.rmtree(self.root2, ignore_errors=True)
1584 super().tearDown()
1586 def testConfigExistence(self):
1587 c = Config(self.tmpConfigFile)
1588 uri_config = ResourcePath(c["root"])
1589 uri_expected = ResourcePath(self.root, forceDirectory=True)
1590 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1591 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1593 def testPutGet(self):
1594 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1595 self.runPutGetTest(storageClass, "test_metric")
1598class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1599 """Test that a config file created by makeRepo outside of repo works."""
1601 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1603 def setUp(self):
1604 self.root = makeTestTempDir(TESTDIR)
1605 self.root2 = makeTestTempDir(TESTDIR)
1607 self.tmpConfigFile = self.root2
1608 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1610 def testConfigExistence(self):
1611 # Append the yaml file else Config constructor does not know the file
1612 # type.
1613 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1614 super().testConfigExistence()
1617class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1618 """Test that a config file created by makeRepo outside of repo works."""
1620 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1622 def setUp(self):
1623 self.root = makeTestTempDir(TESTDIR)
1624 self.root2 = makeTestTempDir(TESTDIR)
1626 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl()
1627 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1630@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1631class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1632 """S3Datastore specialization of a butler; an S3 storage Datastore +
1633 a local in-memory SqlRegistry.
1634 """
1636 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1637 fullConfigKey = None
1638 validationCanFail = True
1640 bucketName = "anybucketname"
1641 """Name of the Bucket that will be used in the tests. The name is read from
1642 the config file used with the tests during set-up.
1643 """
1645 root = "butlerRoot/"
1646 """Root repository directory expected to be used in case useTempRoot=False.
1647 Otherwise the root is set to a 20 characters long randomly generated string
1648 during set-up.
1649 """
1651 datastoreStr = [f"datastore={root}"]
1652 """Contains all expected root locations in a format expected to be
1653 returned by Butler stringification.
1654 """
1656 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1657 """The expected format of the S3 Datastore string."""
1659 registryStr = "/gen3.sqlite3"
1660 """Expected format of the Registry string."""
1662 mock_s3 = mock_s3()
1663 """The mocked s3 interface from moto."""
1665 def genRoot(self):
1666 """Returns a random string of len 20 to serve as a root
1667 name for the temporary bucket repo.
1669 This is equivalent to tempfile.mkdtemp as this is what self.root
1670 becomes when useTempRoot is True.
1671 """
1672 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1673 return rndstr + "/"
1675 def setUp(self):
1676 config = Config(self.configFile)
1677 uri = ResourcePath(config[".datastore.datastore.root"])
1678 self.bucketName = uri.netloc
1680 # Enable S3 mocking of tests.
1681 self.mock_s3.start()
1683 # set up some fake credentials if they do not exist
1684 self.usingDummyCredentials = setAwsEnvCredentials()
1686 if self.useTempRoot:
1687 self.root = self.genRoot()
1688 rooturi = f"s3://{self.bucketName}/{self.root}"
1689 config.update({"datastore": {"datastore": {"root": rooturi}}})
1691 # need local folder to store registry database
1692 self.reg_dir = makeTestTempDir(TESTDIR)
1693 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1695 # MOTO needs to know that we expect Bucket bucketname to exist
1696 # (this used to be the class attribute bucketName)
1697 s3 = boto3.resource("s3")
1698 s3.create_bucket(Bucket=self.bucketName)
1700 self.datastoreStr = f"datastore={self.root}"
1701 self.datastoreName = [f"FileDatastore@{rooturi}"]
1702 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1703 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1705 def tearDown(self):
1706 s3 = boto3.resource("s3")
1707 bucket = s3.Bucket(self.bucketName)
1708 try:
1709 bucket.objects.all().delete()
1710 except botocore.exceptions.ClientError as e:
1711 if e.response["Error"]["Code"] == "404":
1712 # the key was not reachable - pass
1713 pass
1714 else:
1715 raise
1717 bucket = s3.Bucket(self.bucketName)
1718 bucket.delete()
1720 # Stop the S3 mock.
1721 self.mock_s3.stop()
1723 # unset any potentially set dummy credentials
1724 if self.usingDummyCredentials:
1725 unsetAwsEnvCredentials()
1727 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1728 shutil.rmtree(self.reg_dir, ignore_errors=True)
1730 if self.useTempRoot and os.path.exists(self.root):
1731 shutil.rmtree(self.root, ignore_errors=True)
1733 super().tearDown()
1736class PosixDatastoreTransfers(unittest.TestCase):
1737 """Test data transfers between butlers.
1739 Test for different managers. UUID to UUID and integer to integer are
1740 tested. UUID to integer is not supported since we do not currently
1741 want to allow that. Integer to UUID is supported with the caveat
1742 that UUID4 will be generated and this will be incorrect for raw
1743 dataset types. The test ignores that.
1744 """
1746 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1748 @classmethod
1749 def setUpClass(cls):
1750 cls.storageClassFactory = StorageClassFactory()
1751 cls.storageClassFactory.addFromConfig(cls.configFile)
1753 def setUp(self):
1754 self.root = makeTestTempDir(TESTDIR)
1755 self.config = Config(self.configFile)
1757 def tearDown(self):
1758 removeTestTempDir(self.root)
1760 def create_butler(self, manager, label):
1761 config = Config(self.configFile)
1762 config["registry", "managers", "datasets"] = manager
1763 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True)
1765 def create_butlers(self, manager1, manager2):
1766 self.source_butler = self.create_butler(manager1, "1")
1767 self.target_butler = self.create_butler(manager2, "2")
1769 def testTransferUuidToUuid(self):
1770 self.create_butlers(
1771 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
1772 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
1773 )
1774 # Setting id_gen_map should have no effect here
1775 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
1777 def testTransferMissing(self):
1778 """Test transfers where datastore records are missing.
1780 This is how execution butler works.
1781 """
1782 self.create_butlers(
1783 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
1784 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
1785 )
1787 # Configure the source butler to allow trust.
1788 self.source_butler.datastore.trustGetRequest = True
1790 self.assertButlerTransfers(purge=True)
1792 def testTransferMissingDisassembly(self):
1793 """Test transfers where datastore records are missing.
1795 This is how execution butler works.
1796 """
1797 self.create_butlers(
1798 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
1799 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
1800 )
1802 # Configure the source butler to allow trust.
1803 self.source_butler.datastore.trustGetRequest = True
1805 # Test disassembly.
1806 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
1808 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"):
1809 """Test that a run can be transferred to another butler."""
1811 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
1812 datasetTypeName = "random_data"
1814 # Test will create 3 collections and we will want to transfer
1815 # two of those three.
1816 runs = ["run1", "run2", "other"]
1818 # Also want to use two different dataset types to ensure that
1819 # grouping works.
1820 datasetTypeNames = ["random_data", "random_data_2"]
1822 # Create the run collections in the source butler.
1823 for run in runs:
1824 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
1826 # Create dimensions in source butler.
1827 n_exposures = 30
1828 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1829 self.source_butler.registry.insertDimensionData(
1830 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1831 )
1832 self.source_butler.registry.insertDimensionData(
1833 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1834 )
1836 for i in range(n_exposures):
1837 self.source_butler.registry.insertDimensionData(
1838 "exposure",
1839 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"},
1840 )
1842 # Create dataset types in the source butler.
1843 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"])
1844 for datasetTypeName in datasetTypeNames:
1845 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1846 self.source_butler.registry.registerDatasetType(datasetType)
1848 # Write a dataset to an unrelated run -- this will ensure that
1849 # we are rewriting integer dataset ids in the target if necessary.
1850 # Will not be relevant for UUID.
1851 run = "distraction"
1852 butler = Butler(butler=self.source_butler, run=run)
1853 butler.put(
1854 makeExampleMetrics(),
1855 datasetTypeName,
1856 exposure=1,
1857 instrument="DummyCamComp",
1858 physical_filter="d-r",
1859 )
1861 # Write some example metrics to the source
1862 butler = Butler(butler=self.source_butler)
1864 # Set of DatasetRefs that should be in the list of refs to transfer
1865 # but which will not be transferred.
1866 deleted = set()
1868 n_expected = 20 # Number of datasets expected to be transferred
1869 source_refs = []
1870 for i in range(n_exposures):
1871 # Put a third of datasets into each collection, only retain
1872 # two thirds.
1873 index = i % 3
1874 run = runs[index]
1875 datasetTypeName = datasetTypeNames[i % 2]
1877 metric_data = {
1878 "summary": {"counter": i},
1879 "output": {"text": "metric"},
1880 "data": [2 * x for x in range(i)],
1881 }
1882 metric = MetricsExample(**metric_data)
1883 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1884 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
1886 # Remove the datastore record using low-level API
1887 if purge:
1888 # Remove records for a fraction.
1889 if index == 1:
1890 # For one of these delete the file as well.
1891 # This allows the "missing" code to filter the
1892 # file out.
1893 if not deleted:
1894 primary, uris = butler.datastore.getURIs(ref)
1895 if primary:
1896 primary.remove()
1897 for uri in uris.values():
1898 uri.remove()
1899 n_expected -= 1
1900 deleted.add(ref)
1902 # Remove the datastore record.
1903 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1905 if index < 2:
1906 source_refs.append(ref)
1907 if ref not in deleted:
1908 new_metric = butler.get(ref.unresolved(), collections=run)
1909 self.assertEqual(new_metric, metric)
1911 # Create some bad dataset types to ensure we check for inconsistent
1912 # definitions.
1913 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
1914 for datasetTypeName in datasetTypeNames:
1915 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
1916 self.target_butler.registry.registerDatasetType(datasetType)
1917 with self.assertRaises(ConflictingDefinitionError) as cm:
1918 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map)
1919 self.assertIn("dataset type differs", str(cm.exception))
1921 # And remove the bad definitions.
1922 for datasetTypeName in datasetTypeNames:
1923 self.target_butler.registry.removeDatasetType(datasetTypeName)
1925 # Transfer without creating dataset types should fail.
1926 with self.assertRaises(KeyError):
1927 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map)
1929 # Transfer without creating dimensions should fail.
1930 with self.assertRaises(ConflictingDefinitionError) as cm:
1931 self.target_butler.transfer_from(
1932 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True
1933 )
1934 self.assertIn("dimension", str(cm.exception))
1936 # The failed transfer above leaves registry in an inconsistent
1937 # state because the run is created but then rolled back without
1938 # the collection cache being cleared. For now force a refresh.
1939 # Can remove with DM-35498.
1940 self.target_butler.registry.refresh()
1942 # Now transfer them to the second butler, including dimensions.
1943 with self.assertLogs(level=logging.DEBUG) as cm:
1944 transferred = self.target_butler.transfer_from(
1945 self.source_butler,
1946 source_refs,
1947 id_gen_map=id_gen_map,
1948 register_dataset_types=True,
1949 transfer_dimensions=True,
1950 )
1951 self.assertEqual(len(transferred), n_expected)
1952 log_output = ";".join(cm.output)
1953 self.assertIn("found in datastore for chunk", log_output)
1954 self.assertIn("Creating output run", log_output)
1956 # Do the transfer twice to ensure that it will do nothing extra.
1957 # Only do this if purge=True because it does not work for int
1958 # dataset_id.
1959 if purge:
1960 # This should not need to register dataset types.
1961 transferred = self.target_butler.transfer_from(
1962 self.source_butler, source_refs, id_gen_map=id_gen_map
1963 )
1964 self.assertEqual(len(transferred), n_expected)
1966 # Also do an explicit low-level transfer to trigger some
1967 # edge cases.
1968 with self.assertLogs(level=logging.DEBUG) as cm:
1969 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs)
1970 log_output = ";".join(cm.output)
1971 self.assertIn("no file artifacts exist", log_output)
1973 with self.assertRaises(TypeError):
1974 self.target_butler.datastore.transfer_from(self.source_butler, source_refs)
1976 with self.assertRaises(ValueError):
1977 self.target_butler.datastore.transfer_from(
1978 self.source_butler.datastore, source_refs, transfer="split"
1979 )
1981 # Now try to get the same refs from the new butler.
1982 for ref in source_refs:
1983 if ref not in deleted:
1984 unresolved_ref = ref.unresolved()
1985 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run)
1986 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run)
1987 self.assertEqual(new_metric, old_metric)
1989 # Now prune run2 collection and create instead a CHAINED collection.
1990 # This should block the transfer.
1991 self.target_butler.removeRuns(["run2"], unstore=True)
1992 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
1993 with self.assertRaises(CollectionTypeError):
1994 # Re-importing the run1 datasets can be problematic if they
1995 # use integer IDs so filter those out.
1996 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
1997 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map)
2000if __name__ == "__main__": 2000 ↛ 2001line 2000 didn't jump to line 2001, because the condition on line 2000 was never true
2001 unittest.main()