Coverage for tests/test_butler.py: 12%
1262 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:10 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:10 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
24from __future__ import annotations
26import gc
27import json
28import logging
29import os
30import pathlib
31import pickle
32import posixpath
33import random
34import shutil
35import string
36import tempfile
37import unittest
38import uuid
39from collections.abc import Mapping
40from typing import TYPE_CHECKING, Any, cast
42try:
43 import boto3
44 import botocore
45 from moto import mock_s3 # type: ignore[import]
46except ImportError:
47 boto3 = None
49 def mock_s3(cls): # type: ignore[no-untyped-def]
50 """No-op decorator in case moto mock_s3 can not be imported."""
51 return cls
54try:
55 # It's possible but silly to have testing.postgresql installed without
56 # having the postgresql server installed (because then nothing in
57 # testing.postgresql would work), so we use the presence of that module
58 # to test whether we can expect the server to be available.
59 import testing.postgresql # type: ignore[import]
60except ImportError:
61 testing = None
63import astropy.time
64import sqlalchemy
65from lsst.daf.butler import (
66 Butler,
67 ButlerConfig,
68 ButlerRepoIndex,
69 CollectionType,
70 Config,
71 DataCoordinate,
72 DatasetExistence,
73 DatasetRef,
74 DatasetType,
75 FileDataset,
76 FileTemplate,
77 FileTemplateValidationError,
78 StorageClassFactory,
79 ValidationError,
80 script,
81)
82from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
83from lsst.daf.butler.datastores.fileDatastore import FileDatastore
84from lsst.daf.butler.registries.sql import SqlRegistry
85from lsst.daf.butler.registry import (
86 CollectionError,
87 CollectionTypeError,
88 ConflictingDefinitionError,
89 DataIdValueError,
90 MissingCollectionError,
91 OrphanedRecordError,
92)
93from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter
94from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir
95from lsst.resources import ResourcePath
96from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
97from lsst.utils import doImportType
98from lsst.utils.introspection import get_full_type_name
100if TYPE_CHECKING:
101 import types
103 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass
105TESTDIR = os.path.abspath(os.path.dirname(__file__))
108def clean_environment() -> None:
109 """Remove external environment variables that affect the tests."""
110 for k in (
111 "DAF_BUTLER_REPOSITORY_INDEX",
112 "S3_ENDPOINT_URL",
113 "AWS_ACCESS_KEY_ID",
114 "AWS_SECRET_ACCESS_KEY",
115 "AWS_SHARED_CREDENTIALS_FILE",
116 ):
117 os.environ.pop(k, None)
120def makeExampleMetrics() -> MetricsExample:
121 """Return example dataset suitable for tests."""
122 return MetricsExample(
123 {"AM1": 5.2, "AM2": 30.6},
124 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
125 [563, 234, 456.7, 752, 8, 9, 27],
126 )
129class TransactionTestError(Exception):
130 """Specific error for testing transactions, to prevent misdiagnosing
131 that might otherwise occur when a standard exception is used.
132 """
134 pass
137class ButlerConfigTests(unittest.TestCase):
138 """Simple tests for ButlerConfig that are not tested in any other test
139 cases.
140 """
142 def testSearchPath(self) -> None:
143 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
144 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
145 config1 = ButlerConfig(configFile)
146 self.assertNotIn("testConfigs", "\n".join(cm.output))
148 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
149 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
150 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
151 self.assertIn("testConfigs", "\n".join(cm.output))
153 key = ("datastore", "records", "table")
154 self.assertNotEqual(config1[key], config2[key])
155 self.assertEqual(config2[key], "override_record")
158class ButlerPutGetTests(TestCaseMixin):
159 """Helper method for running a suite of put/get tests from different
160 butler configurations.
161 """
163 root: str
164 default_run = "ingésτ😺"
165 storageClassFactory: StorageClassFactory
166 configFile: str
167 tmpConfigFile: str
169 @staticmethod
170 def addDatasetType(
171 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry
172 ) -> DatasetType:
173 """Create a DatasetType and register it"""
174 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
175 registry.registerDatasetType(datasetType)
176 return datasetType
178 @classmethod
179 def setUpClass(cls) -> None:
180 cls.storageClassFactory = StorageClassFactory()
181 cls.storageClassFactory.addFromConfig(cls.configFile)
183 def assertGetComponents(
184 self,
185 butler: Butler,
186 datasetRef: DatasetRef,
187 components: tuple[str, ...],
188 reference: Any,
189 collections: Any = None,
190 ) -> None:
191 datasetType = datasetRef.datasetType
192 dataId = datasetRef.dataId
193 deferred = butler.getDeferred(datasetRef)
195 for component in components:
196 compTypeName = datasetType.componentTypeName(component)
197 result = butler.get(compTypeName, dataId, collections=collections)
198 self.assertEqual(result, getattr(reference, component))
199 result_deferred = deferred.get(component=component)
200 self.assertEqual(result_deferred, result)
202 def tearDown(self) -> None:
203 removeTestTempDir(self.root)
205 def create_butler(
206 self, run: str, storageClass: StorageClass | str, datasetTypeName: str
207 ) -> tuple[Butler, DatasetType]:
208 butler = Butler(self.tmpConfigFile, run=run)
210 collections = set(butler.registry.queryCollections())
211 self.assertEqual(collections, {run})
213 # Create and register a DatasetType
214 dimensions = butler.dimensions.extract(["instrument", "visit"])
216 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
218 # Add needed Dimensions
219 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
220 butler.registry.insertDimensionData(
221 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
222 )
223 butler.registry.insertDimensionData(
224 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"}
225 )
226 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
227 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
228 butler.registry.insertDimensionData(
229 "visit",
230 {
231 "instrument": "DummyCamComp",
232 "id": 423,
233 "name": "fourtwentythree",
234 "physical_filter": "d-r",
235 "visit_system": 1,
236 "datetime_begin": visit_start,
237 "datetime_end": visit_end,
238 },
239 )
241 # Add more visits for some later tests
242 for visit_id in (424, 425):
243 butler.registry.insertDimensionData(
244 "visit",
245 {
246 "instrument": "DummyCamComp",
247 "id": visit_id,
248 "name": f"fourtwentyfour_{visit_id}",
249 "physical_filter": "d-r",
250 "visit_system": 1,
251 },
252 )
253 return butler, datasetType
255 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler:
256 # New datasets will be added to run and tag, but we will only look in
257 # tag when looking up datasets.
258 run = self.default_run
259 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
260 assert butler.run is not None
262 # Create and store a dataset
263 metric = makeExampleMetrics()
264 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423})
266 # Put and remove the dataset once as a DatasetRef, once as a dataId,
267 # and once with a DatasetType
269 # Keep track of any collections we add and do not clean up
270 expected_collections = {run}
272 counter = 0
273 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1")
274 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate]
275 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)):
276 # Since we are using subTest we can get cascading failures
277 # here with the first attempt failing and the others failing
278 # immediately because the dataset already exists. Work around
279 # this by using a distinct run collection each time
280 counter += 1
281 this_run = f"put_run_{counter}"
282 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
283 expected_collections.update({this_run})
285 with self.subTest(args=args):
286 kwargs: dict[str, Any] = {}
287 if not isinstance(args[0], DatasetRef): # type: ignore
288 kwargs["run"] = this_run
289 ref = butler.put(metric, *args, **kwargs)
290 self.assertIsInstance(ref, DatasetRef)
292 # Test getDirect
293 metricOut = butler.get(ref)
294 self.assertEqual(metric, metricOut)
295 # Test get
296 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
297 self.assertEqual(metric, metricOut)
298 # Test get with a datasetRef
299 metricOut = butler.get(ref)
300 self.assertEqual(metric, metricOut)
301 # Test getDeferred with dataId
302 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
303 self.assertEqual(metric, metricOut)
304 # Test getDeferred with a ref
305 metricOut = butler.getDeferred(ref).get()
306 self.assertEqual(metric, metricOut)
308 # Check we can get components
309 if storageClass.isComposite():
310 self.assertGetComponents(
311 butler, ref, ("summary", "data", "output"), metric, collections=this_run
312 )
314 # Can the artifacts themselves be retrieved?
315 if not butler.datastore.isEphemeral:
316 root_uri = ResourcePath(self.root)
318 for preserve_path in (True, False):
319 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
320 # Use copy so that we can test that overwrite
321 # protection works (using "auto" for File URIs would
322 # use hard links and subsequent transfer would work
323 # because it knows they are the same file).
324 transferred = butler.retrieveArtifacts(
325 [ref], destination, preserve_path=preserve_path, transfer="copy"
326 )
327 self.assertGreater(len(transferred), 0)
328 artifacts = list(ResourcePath.findFileResources([destination]))
329 self.assertEqual(set(transferred), set(artifacts))
331 for artifact in transferred:
332 path_in_destination = artifact.relative_to(destination)
333 self.assertIsNotNone(path_in_destination)
334 assert path_in_destination is not None
336 # when path is not preserved there should not be
337 # any path separators.
338 num_seps = path_in_destination.count("/")
339 if preserve_path:
340 self.assertGreater(num_seps, 0)
341 else:
342 self.assertEqual(num_seps, 0)
344 primary_uri, secondary_uris = butler.datastore.getURIs(ref)
345 n_uris = len(secondary_uris)
346 if primary_uri:
347 n_uris += 1
348 self.assertEqual(
349 len(artifacts),
350 n_uris,
351 "Comparing expected artifacts vs actual:"
352 f" {artifacts} vs {primary_uri} and {secondary_uris}",
353 )
355 if preserve_path:
356 # No need to run these twice
357 with self.assertRaises(ValueError):
358 butler.retrieveArtifacts([ref], destination, transfer="move")
360 with self.assertRaises(FileExistsError):
361 butler.retrieveArtifacts([ref], destination)
363 transferred_again = butler.retrieveArtifacts(
364 [ref], destination, preserve_path=preserve_path, overwrite=True
365 )
366 self.assertEqual(set(transferred_again), set(transferred))
368 # Now remove the dataset completely.
369 butler.pruneDatasets([ref], purge=True, unstore=True)
370 # Lookup with original args should still fail.
371 kwargs = {"collections": this_run}
372 if isinstance(args[0], DatasetRef):
373 kwargs = {} # Prevent warning from being issued.
374 self.assertFalse(butler.exists(*args, **kwargs))
375 # get() should still fail.
376 with self.assertRaises(FileNotFoundError):
377 butler.get(ref)
378 # Registry shouldn't be able to find it by dataset_id anymore.
379 self.assertIsNone(butler.registry.getDataset(ref.id))
381 # Do explicit registry removal since we know they are
382 # empty
383 butler.registry.removeCollection(this_run)
384 expected_collections.remove(this_run)
386 # Create DatasetRef for put using default run.
387 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run)
389 # Put the dataset again, since the last thing we did was remove it
390 # and we want to use the default collection.
391 ref = butler.put(metric, refIn)
393 # Get with parameters
394 stop = 4
395 sliced = butler.get(ref, parameters={"slice": slice(stop)})
396 self.assertNotEqual(metric, sliced)
397 self.assertEqual(metric.summary, sliced.summary)
398 self.assertEqual(metric.output, sliced.output)
399 assert metric.data is not None # for mypy
400 self.assertEqual(metric.data[:stop], sliced.data)
401 # getDeferred with parameters
402 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
403 self.assertNotEqual(metric, sliced)
404 self.assertEqual(metric.summary, sliced.summary)
405 self.assertEqual(metric.output, sliced.output)
406 self.assertEqual(metric.data[:stop], sliced.data)
407 # getDeferred with deferred parameters
408 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
409 self.assertNotEqual(metric, sliced)
410 self.assertEqual(metric.summary, sliced.summary)
411 self.assertEqual(metric.output, sliced.output)
412 self.assertEqual(metric.data[:stop], sliced.data)
414 if storageClass.isComposite():
415 # Check that components can be retrieved
416 metricOut = butler.get(ref.datasetType.name, dataId)
417 compNameS = ref.datasetType.componentTypeName("summary")
418 compNameD = ref.datasetType.componentTypeName("data")
419 summary = butler.get(compNameS, dataId)
420 self.assertEqual(summary, metric.summary)
421 data = butler.get(compNameD, dataId)
422 self.assertEqual(data, metric.data)
424 if "counter" in storageClass.derivedComponents:
425 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
426 self.assertEqual(count, len(data))
428 count = butler.get(
429 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)}
430 )
431 self.assertEqual(count, stop)
433 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
434 assert compRef is not None
435 summary = butler.get(compRef)
436 self.assertEqual(summary, metric.summary)
438 # Create a Dataset type that has the same name but is inconsistent.
439 inconsistentDatasetType = DatasetType(
440 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config")
441 )
443 # Getting with a dataset type that does not match registry fails
444 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"):
445 butler.get(inconsistentDatasetType, dataId)
447 # Combining a DatasetRef with a dataId should fail
448 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"):
449 butler.get(ref, dataId)
450 # Getting with an explicit ref should fail if the id doesn't match.
451 with self.assertRaises(FileNotFoundError):
452 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run))
454 # Getting a dataset with unknown parameters should fail
455 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"):
456 butler.get(ref, parameters={"unsupported": True})
458 # Check we have a collection
459 collections = set(butler.registry.queryCollections())
460 self.assertEqual(collections, expected_collections)
462 # Clean up to check that we can remove something that may have
463 # already had a component removed
464 butler.pruneDatasets([ref], unstore=True, purge=True)
466 # Add the same ref again, so we can check that duplicate put fails.
467 ref = butler.put(metric, datasetType, dataId)
469 # Repeat put will fail.
470 with self.assertRaisesRegex(
471 ConflictingDefinitionError, "A database constraint failure was triggered"
472 ):
473 butler.put(metric, datasetType, dataId)
475 # Remove the datastore entry.
476 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
478 # Put will still fail
479 with self.assertRaisesRegex(
480 ConflictingDefinitionError, "A database constraint failure was triggered"
481 ):
482 butler.put(metric, datasetType, dataId)
484 # Repeat the same sequence with resolved ref.
485 butler.pruneDatasets([ref], unstore=True, purge=True)
486 ref = butler.put(metric, refIn)
488 # Repeat put will fail.
489 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"):
490 butler.put(metric, refIn)
492 # Remove the datastore entry.
493 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
495 # In case of resolved ref this write will succeed.
496 ref = butler.put(metric, refIn)
498 # Leave the dataset in place since some downstream tests require
499 # something to be present
501 return butler
503 def testDeferredCollectionPassing(self) -> None:
504 # Construct a butler with no run or collection, but make it writeable.
505 butler = Butler(self.tmpConfigFile, writeable=True)
506 # Create and register a DatasetType
507 dimensions = butler.dimensions.extract(["instrument", "visit"])
508 datasetType = self.addDatasetType(
509 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry
510 )
511 # Add needed Dimensions
512 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
513 butler.registry.insertDimensionData(
514 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
515 )
516 butler.registry.insertDimensionData(
517 "visit",
518 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
519 )
520 dataId = {"instrument": "DummyCamComp", "visit": 423}
521 # Create dataset.
522 metric = makeExampleMetrics()
523 # Register a new run and put dataset.
524 run = "deferred"
525 self.assertTrue(butler.registry.registerRun(run))
526 # Second time it will be allowed but indicate no-op
527 self.assertFalse(butler.registry.registerRun(run))
528 ref = butler.put(metric, datasetType, dataId, run=run)
529 # Putting with no run should fail with TypeError.
530 with self.assertRaises(CollectionError):
531 butler.put(metric, datasetType, dataId)
532 # Dataset should exist.
533 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
534 # We should be able to get the dataset back, but with and without
535 # a deferred dataset handle.
536 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
537 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
538 # Trying to find the dataset without any collection is a TypeError.
539 self.assertFalse(butler.exists(datasetType, dataId))
540 with self.assertRaises(CollectionError):
541 butler.get(datasetType, dataId)
542 # Associate the dataset with a different collection.
543 butler.registry.registerCollection("tagged")
544 butler.registry.associate("tagged", [ref])
545 # Deleting the dataset from the new collection should make it findable
546 # in the original collection.
547 butler.pruneDatasets([ref], tags=["tagged"])
548 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
551class ButlerTests(ButlerPutGetTests):
552 """Tests for Butler."""
554 useTempRoot = True
555 validationCanFail: bool
556 fullConfigKey: str | None
557 registryStr: str | None
558 datastoreName: list[str] | None
559 datastoreStr: list[str]
561 def setUp(self) -> None:
562 """Create a new butler root for each test."""
563 self.root = makeTestTempDir(TESTDIR)
564 Butler.makeRepo(self.root, config=Config(self.configFile))
565 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
567 def testConstructor(self) -> None:
568 """Independent test of constructor."""
569 butler = Butler(self.tmpConfigFile, run=self.default_run)
570 self.assertIsInstance(butler, Butler)
572 # Check that butler.yaml is added automatically.
573 if self.tmpConfigFile.endswith(end := "/butler.yaml"):
574 config_dir = self.tmpConfigFile[: -len(end)]
575 butler = Butler(config_dir, run=self.default_run)
576 self.assertIsInstance(butler, Butler)
578 # Even with a ResourcePath.
579 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
580 self.assertIsInstance(butler, Butler)
582 collections = set(butler.registry.queryCollections())
583 self.assertEqual(collections, {self.default_run})
585 # Check that some special characters can be included in run name.
586 special_run = "u@b.c-A"
587 butler_special = Butler(butler=butler, run=special_run)
588 collections = set(butler_special.registry.queryCollections("*@*"))
589 self.assertEqual(collections, {special_run})
591 butler2 = Butler(butler=butler, collections=["other"])
592 self.assertEqual(butler2.collections, ("other",))
593 self.assertIsNone(butler2.run)
594 self.assertIs(butler.datastore, butler2.datastore)
596 # Test that we can use an environment variable to find this
597 # repository.
598 butler_index = Config()
599 butler_index["label"] = self.tmpConfigFile
600 for suffix in (".yaml", ".json"):
601 # Ensure that the content differs so that we know that
602 # we aren't reusing the cache.
603 bad_label = f"file://bucket/not_real{suffix}"
604 butler_index["bad_label"] = bad_label
605 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
606 butler_index.dumpToUri(temp_file)
607 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
608 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"})
609 uri = Butler.get_repo_uri("bad_label")
610 self.assertEqual(uri, ResourcePath(bad_label))
611 uri = Butler.get_repo_uri("label")
612 butler = Butler(uri, writeable=False)
613 self.assertIsInstance(butler, Butler)
614 butler = Butler("label", writeable=False)
615 self.assertIsInstance(butler, Butler)
616 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
617 Butler("not_there", writeable=False)
618 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"):
619 Butler("bad_label")
620 with self.assertRaises(FileNotFoundError):
621 # Should ignore aliases.
622 Butler(ResourcePath("label", forceAbsolute=False))
623 with self.assertRaises(KeyError) as cm:
624 Butler.get_repo_uri("missing")
625 self.assertEqual(
626 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False)
627 )
628 self.assertIn("not known to", str(cm.exception))
629 # Should report no failure.
630 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "")
631 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
632 # Now with empty configuration.
633 butler_index = Config()
634 butler_index.dumpToUri(temp_file)
635 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
636 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"):
637 Butler("label")
638 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
639 # Now with bad contents.
640 with open(temp_file.ospath, "w") as fh:
641 print("'", file=fh)
642 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
643 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"):
644 Butler("label")
645 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
646 with self.assertRaises(FileNotFoundError):
647 Butler.get_repo_uri("label")
648 self.assertEqual(Butler.get_known_repos(), set())
650 with self.assertRaisesRegex(FileNotFoundError, "index file not found"):
651 Butler("label")
653 # Check that we can create Butler when the alias file is not found.
654 butler = Butler(self.tmpConfigFile, writeable=False)
655 self.assertIsInstance(butler, Butler)
656 with self.assertRaises(KeyError) as cm:
657 # No environment variable set.
658 Butler.get_repo_uri("label")
659 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False))
660 self.assertIn("No repository index defined", str(cm.exception))
661 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"):
662 # No aliases registered.
663 Butler("not_there")
664 self.assertEqual(Butler.get_known_repos(), set())
666 def testBasicPutGet(self) -> None:
667 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
668 self.runPutGetTest(storageClass, "test_metric")
670 def testCompositePutGetConcrete(self) -> None:
671 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
672 butler = self.runPutGetTest(storageClass, "test_metric")
674 # Should *not* be disassembled
675 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
676 self.assertEqual(len(datasets), 1)
677 uri, components = butler.getURIs(datasets[0])
678 self.assertIsInstance(uri, ResourcePath)
679 self.assertFalse(components)
680 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
681 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
683 # Predicted dataset
684 dataId = {"instrument": "DummyCamComp", "visit": 424}
685 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
686 self.assertFalse(components)
687 self.assertIsInstance(uri, ResourcePath)
688 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
689 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
691 def testCompositePutGetVirtual(self) -> None:
692 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
693 butler = self.runPutGetTest(storageClass, "test_metric_comp")
695 # Should be disassembled
696 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
697 self.assertEqual(len(datasets), 1)
698 uri, components = butler.getURIs(datasets[0])
700 if butler.datastore.isEphemeral:
701 # Never disassemble in-memory datastore
702 self.assertIsInstance(uri, ResourcePath)
703 self.assertFalse(components)
704 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
705 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
706 else:
707 self.assertIsNone(uri)
708 self.assertEqual(set(components), set(storageClass.components))
709 for compuri in components.values():
710 self.assertIsInstance(compuri, ResourcePath)
711 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
712 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
714 # Predicted dataset
715 dataId = {"instrument": "DummyCamComp", "visit": 424}
716 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
718 if butler.datastore.isEphemeral:
719 # Never disassembled
720 self.assertIsInstance(uri, ResourcePath)
721 self.assertFalse(components)
722 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
723 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
724 else:
725 self.assertIsNone(uri)
726 self.assertEqual(set(components), set(storageClass.components))
727 for compuri in components.values():
728 self.assertIsInstance(compuri, ResourcePath)
729 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
730 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
732 def testStorageClassOverrideGet(self) -> None:
733 """Test storage class conversion on get with override."""
734 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
735 datasetTypeName = "anything"
736 run = self.default_run
738 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
740 # Create and store a dataset.
741 metric = makeExampleMetrics()
742 dataId = {"instrument": "DummyCamComp", "visit": 423}
744 ref = butler.put(metric, datasetType, dataId)
746 # Return native type.
747 retrieved = butler.get(ref)
748 self.assertEqual(retrieved, metric)
750 # Specify an override.
751 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
752 model = butler.get(ref, storageClass=new_sc)
753 self.assertNotEqual(type(model), type(retrieved))
754 self.assertIs(type(model), new_sc.pytype)
755 self.assertEqual(retrieved, model)
757 # Defer but override later.
758 deferred = butler.getDeferred(ref)
759 model = deferred.get(storageClass=new_sc)
760 self.assertIs(type(model), new_sc.pytype)
761 self.assertEqual(retrieved, model)
763 # Defer but override up front.
764 deferred = butler.getDeferred(ref, storageClass=new_sc)
765 model = deferred.get()
766 self.assertIs(type(model), new_sc.pytype)
767 self.assertEqual(retrieved, model)
769 # Retrieve a component. Should be a tuple.
770 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple")
771 self.assertIs(type(data), tuple)
772 self.assertEqual(data, tuple(retrieved.data))
774 # Parameter on the write storage class should work regardless
775 # of read storage class.
776 data = butler.get(
777 "anything.data",
778 dataId,
779 storageClass="StructuredDataDataTestTuple",
780 parameters={"slice": slice(2, 4)},
781 )
782 self.assertEqual(len(data), 2)
784 # Try a parameter that is known to the read storage class but not
785 # the write storage class.
786 with self.assertRaises(KeyError):
787 butler.get(
788 "anything.data",
789 dataId,
790 storageClass="StructuredDataDataTestTuple",
791 parameters={"xslice": slice(2, 4)},
792 )
794 def testPytypePutCoercion(self) -> None:
795 """Test python type coercion on Butler.get and put."""
796 # Store some data with the normal example storage class.
797 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
798 datasetTypeName = "test_metric"
799 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)
801 dataId = {"instrument": "DummyCamComp", "visit": 423}
803 # Put a dict and this should coerce to a MetricsExample
804 test_dict = {"summary": {"a": 1}, "output": {"b": 2}}
805 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424)
806 test_metric = butler.get(metric_ref)
807 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample")
808 self.assertEqual(test_metric.summary, test_dict["summary"])
809 self.assertEqual(test_metric.output, test_dict["output"])
811 # Check that the put still works if a DatasetType is given with
812 # a definition matching this python type.
813 registry_type = butler.registry.getDatasetType(datasetTypeName)
814 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson")
815 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425)
816 self.assertEqual(metric2_ref.datasetType, registry_type)
818 # The get will return the type expected by registry.
819 test_metric2 = butler.get(metric2_ref)
820 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample")
822 # Make a new DatasetRef with the compatible but different DatasetType.
823 # This should now return a dict.
824 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run)
825 test_dict2 = butler.get(new_ref)
826 self.assertEqual(get_full_type_name(test_dict2), "dict")
828 # Get it again with the wrong dataset type definition using get()
829 # rather than get(). This should be consistent with get()
830 # behavior and return the type of the DatasetType.
831 test_dict3 = butler.get(this_type, dataId=dataId, visit=425)
832 self.assertEqual(get_full_type_name(test_dict3), "dict")
834 def testIngest(self) -> None:
835 butler = Butler(self.tmpConfigFile, run=self.default_run)
837 # Create and register a DatasetType
838 dimensions = butler.dimensions.extract(["instrument", "visit", "detector"])
840 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
841 datasetTypeName = "metric"
843 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
845 # Add needed Dimensions
846 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
847 butler.registry.insertDimensionData(
848 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
849 )
850 for detector in (1, 2):
851 butler.registry.insertDimensionData(
852 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"}
853 )
855 butler.registry.insertDimensionData(
856 "visit",
857 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
858 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"},
859 )
861 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter")
862 dataRoot = os.path.join(TESTDIR, "data", "basic")
863 datasets = []
864 for detector in (1, 2):
865 detector_name = f"detector_{detector}"
866 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
867 dataId = butler.registry.expandDataId(
868 {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
869 )
870 # Create a DatasetRef for ingest
871 refIn = DatasetRef(datasetType, dataId, run=self.default_run)
873 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter))
875 butler.ingest(*datasets, transfer="copy")
877 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
878 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
880 metrics1 = butler.get(datasetTypeName, dataId1)
881 metrics2 = butler.get(datasetTypeName, dataId2)
882 self.assertNotEqual(metrics1, metrics2)
884 # Compare URIs
885 uri1 = butler.getURI(datasetTypeName, dataId1)
886 uri2 = butler.getURI(datasetTypeName, dataId2)
887 self.assertNotEqual(uri1, uri2)
889 # Now do a multi-dataset but single file ingest
890 metricFile = os.path.join(dataRoot, "detectors.yaml")
891 refs = []
892 for detector in (1, 2):
893 detector_name = f"detector_{detector}"
894 dataId = butler.registry.expandDataId(
895 {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
896 )
897 # Create a DatasetRef for ingest
898 refs.append(DatasetRef(datasetType, dataId, run=self.default_run))
900 # Test "move" transfer to ensure that the files themselves
901 # have disappeared following ingest.
902 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile:
903 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy")
905 datasets = []
906 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter))
908 # For first ingest use copy.
909 butler.ingest(*datasets, transfer="copy", record_validation_info=False)
911 # Now try to ingest again in "execution butler" mode where
912 # the registry entries exist but the datastore does not have
913 # the files. We also need to strip the dimension records to ensure
914 # that they will be re-added by the ingest.
915 ref = datasets[0].refs[0]
916 datasets[0].refs = [
917 cast(
918 DatasetRef,
919 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run),
920 )
921 for ref in datasets[0].refs
922 ]
923 all_refs = []
924 for dataset in datasets:
925 refs = []
926 for ref in dataset.refs:
927 # Create a dict from the dataId to drop the records.
928 new_data_id = {str(k): v for k, v in ref.dataId.items()}
929 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run)
930 assert new_ref is not None
931 self.assertFalse(new_ref.dataId.hasRecords())
932 refs.append(new_ref)
933 dataset.refs = refs
934 all_refs.extend(dataset.refs)
935 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False)
937 # Use move mode to test that the file is deleted. Also
938 # disable recording of file size.
939 butler.ingest(*datasets, transfer="move", record_validation_info=False)
941 # Check that every ref now has records.
942 for dataset in datasets:
943 for ref in dataset.refs:
944 self.assertTrue(ref.dataId.hasRecords())
946 # Ensure that the file has disappeared.
947 self.assertFalse(tempFile.exists())
949 # Check that the datastore recorded no file size.
950 # Not all datastores can support this.
951 try:
952 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined]
953 self.assertEqual(infos[0].file_size, -1)
954 except AttributeError:
955 pass
957 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
958 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
960 multi1 = butler.get(datasetTypeName, dataId1)
961 multi2 = butler.get(datasetTypeName, dataId2)
963 self.assertEqual(multi1, metrics1)
964 self.assertEqual(multi2, metrics2)
966 # Compare URIs
967 uri1 = butler.getURI(datasetTypeName, dataId1)
968 uri2 = butler.getURI(datasetTypeName, dataId2)
969 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
971 # Test that removing one does not break the second
972 # This line will issue a warning log message for a ChainedDatastore
973 # that uses an InMemoryDatastore since in-memory can not ingest
974 # files.
975 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
976 self.assertFalse(butler.exists(datasetTypeName, dataId1))
977 self.assertTrue(butler.exists(datasetTypeName, dataId2))
978 multi2b = butler.get(datasetTypeName, dataId2)
979 self.assertEqual(multi2, multi2b)
981 # Ensure we can ingest 0 datasets
982 datasets = []
983 butler.ingest(*datasets)
985 def testPickle(self) -> None:
986 """Test pickle support."""
987 butler = Butler(self.tmpConfigFile, run=self.default_run)
988 butlerOut = pickle.loads(pickle.dumps(butler))
989 self.assertIsInstance(butlerOut, Butler)
990 self.assertEqual(butlerOut._config, butler._config)
991 self.assertEqual(butlerOut.collections, butler.collections)
992 self.assertEqual(butlerOut.run, butler.run)
994 def testGetDatasetTypes(self) -> None:
995 butler = Butler(self.tmpConfigFile, run=self.default_run)
996 dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"])
997 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [
998 (
999 "instrument",
1000 [
1001 {"instrument": "DummyCam"},
1002 {"instrument": "DummyHSC"},
1003 {"instrument": "DummyCamComp"},
1004 ],
1005 ),
1006 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]),
1007 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]),
1008 ]
1009 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1010 # Add needed Dimensions
1011 for element, data in dimensionEntries:
1012 butler.registry.insertDimensionData(element, *data)
1014 # When a DatasetType is added to the registry entries are not created
1015 # for components but querying them can return the components.
1016 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
1017 components = set()
1018 for datasetTypeName in datasetTypeNames:
1019 # Create and register a DatasetType
1020 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1022 for componentName in storageClass.components:
1023 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
1025 fromRegistry: set[DatasetType] = set()
1026 for parent_dataset_type in butler.registry.queryDatasetTypes():
1027 fromRegistry.add(parent_dataset_type)
1028 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes())
1029 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
1031 # Now that we have some dataset types registered, validate them
1032 butler.validateConfiguration(
1033 ignore=[
1034 "test_metric_comp",
1035 "metric3",
1036 "metric5",
1037 "calexp",
1038 "DummySC",
1039 "datasetType.component",
1040 "random_data",
1041 "random_data_2",
1042 ]
1043 )
1045 # Add a new datasetType that will fail template validation
1046 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
1047 if self.validationCanFail:
1048 with self.assertRaises(ValidationError):
1049 butler.validateConfiguration()
1051 # Rerun validation but with a subset of dataset type names
1052 butler.validateConfiguration(datasetTypeNames=["metric4"])
1054 # Rerun validation but ignore the bad datasetType
1055 butler.validateConfiguration(
1056 ignore=[
1057 "test_metric_comp",
1058 "metric3",
1059 "metric5",
1060 "calexp",
1061 "DummySC",
1062 "datasetType.component",
1063 "random_data",
1064 "random_data_2",
1065 ]
1066 )
1068 def testTransaction(self) -> None:
1069 butler = Butler(self.tmpConfigFile, run=self.default_run)
1070 datasetTypeName = "test_metric"
1071 dimensions = butler.dimensions.extract(["instrument", "visit"])
1072 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = (
1073 ("instrument", {"instrument": "DummyCam"}),
1074 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1075 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1076 )
1077 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1078 metric = makeExampleMetrics()
1079 dataId = {"instrument": "DummyCam", "visit": 42}
1080 # Create and register a DatasetType
1081 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1082 with self.assertRaises(TransactionTestError):
1083 with butler.transaction():
1084 # Add needed Dimensions
1085 for args in dimensionEntries:
1086 butler.registry.insertDimensionData(*args)
1087 # Store a dataset
1088 ref = butler.put(metric, datasetTypeName, dataId)
1089 self.assertIsInstance(ref, DatasetRef)
1090 # Test getDirect
1091 metricOut = butler.get(ref)
1092 self.assertEqual(metric, metricOut)
1093 # Test get
1094 metricOut = butler.get(datasetTypeName, dataId)
1095 self.assertEqual(metric, metricOut)
1096 # Check we can get components
1097 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric)
1098 raise TransactionTestError("This should roll back the entire transaction")
1099 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"):
1100 butler.registry.expandDataId(dataId)
1101 # Should raise LookupError for missing data ID value
1102 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
1103 butler.get(datasetTypeName, dataId)
1104 # Also check explicitly if Dataset entry is missing
1105 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
1106 # Direct retrieval should not find the file in the Datastore
1107 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
1108 butler.get(ref)
1110 def testMakeRepo(self) -> None:
1111 """Test that we can write butler configuration to a new repository via
1112 the Butler.makeRepo interface and then instantiate a butler from the
1113 repo root.
1114 """
1115 # Do not run the test if we know this datastore configuration does
1116 # not support a file system root
1117 if self.fullConfigKey is None:
1118 return
1120 # create two separate directories
1121 root1 = tempfile.mkdtemp(dir=self.root)
1122 root2 = tempfile.mkdtemp(dir=self.root)
1124 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
1125 limited = Config(self.configFile)
1126 butler1 = Butler(butlerConfig)
1127 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
1128 full = Config(self.tmpConfigFile)
1129 butler2 = Butler(butlerConfig)
1130 # Butlers should have the same configuration regardless of whether
1131 # defaults were expanded.
1132 self.assertEqual(butler1._config, butler2._config)
1133 # Config files loaded directly should not be the same.
1134 self.assertNotEqual(limited, full)
1135 # Make sure "limited" doesn't have a few keys we know it should be
1136 # inheriting from defaults.
1137 self.assertIn(self.fullConfigKey, full)
1138 self.assertNotIn(self.fullConfigKey, limited)
1140 # Collections don't appear until something is put in them
1141 collections1 = set(butler1.registry.queryCollections())
1142 self.assertEqual(collections1, set())
1143 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
1145 # Check that a config with no associated file name will not
1146 # work properly with relocatable Butler repo
1147 butlerConfig.configFile = None
1148 with self.assertRaises(ValueError):
1149 Butler(butlerConfig)
1151 with self.assertRaises(FileExistsError):
1152 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)
1154 def testStringification(self) -> None:
1155 butler = Butler(self.tmpConfigFile, run=self.default_run)
1156 butlerStr = str(butler)
1158 if self.datastoreStr is not None:
1159 for testStr in self.datastoreStr:
1160 self.assertIn(testStr, butlerStr)
1161 if self.registryStr is not None:
1162 self.assertIn(self.registryStr, butlerStr)
1164 datastoreName = butler.datastore.name
1165 if self.datastoreName is not None:
1166 for testStr in self.datastoreName:
1167 self.assertIn(testStr, datastoreName)
1169 def testButlerRewriteDataId(self) -> None:
1170 """Test that dataIds can be rewritten based on dimension records."""
1171 butler = Butler(self.tmpConfigFile, run=self.default_run)
1173 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1174 datasetTypeName = "random_data"
1176 # Create dimension records.
1177 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1178 butler.registry.insertDimensionData(
1179 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1180 )
1181 butler.registry.insertDimensionData(
1182 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1183 )
1185 dimensions = butler.dimensions.extract(["instrument", "exposure"])
1186 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1187 butler.registry.registerDatasetType(datasetType)
1189 n_exposures = 5
1190 dayobs = 20210530
1192 for i in range(n_exposures):
1193 butler.registry.insertDimensionData(
1194 "exposure",
1195 {
1196 "instrument": "DummyCamComp",
1197 "id": i,
1198 "obs_id": f"exp{i}",
1199 "seq_num": i,
1200 "day_obs": dayobs,
1201 "physical_filter": "d-r",
1202 },
1203 )
1205 # Write some data.
1206 for i in range(n_exposures):
1207 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]}
1209 # Use the seq_num for the put to test rewriting.
1210 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1211 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1213 # Check that the exposure is correct in the dataId
1214 self.assertEqual(ref.dataId["exposure"], i)
1216 # and check that we can get the dataset back with the same dataId
1217 new_metric = butler.get(datasetTypeName, dataId=dataId)
1218 self.assertEqual(new_metric, metric)
1221class FileDatastoreButlerTests(ButlerTests):
1222 """Common tests and specialization of ButlerTests for butlers backed
1223 by datastores that inherit from FileDatastore.
1224 """
1226 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool:
1227 """Check if file exists at a given path (relative to root).
1229 Test testPutTemplates verifies actual physical existance of the files
1230 in the requested location.
1231 """
1232 uri = ResourcePath(root, forceDirectory=True)
1233 return uri.join(relpath).exists()
1235 def testPutTemplates(self) -> None:
1236 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1237 butler = Butler(self.tmpConfigFile, run=self.default_run)
1239 # Add needed Dimensions
1240 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1241 butler.registry.insertDimensionData(
1242 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1243 )
1244 butler.registry.insertDimensionData(
1245 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"}
1246 )
1247 butler.registry.insertDimensionData(
1248 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"}
1249 )
1251 # Create and store a dataset
1252 metric = makeExampleMetrics()
1254 # Create two almost-identical DatasetTypes (both will use default
1255 # template)
1256 dimensions = butler.dimensions.extract(["instrument", "visit"])
1257 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1258 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1259 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1261 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1262 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1264 # Put with exactly the data ID keys needed
1265 ref = butler.put(metric, "metric1", dataId1)
1266 uri = butler.getURI(ref)
1267 self.assertTrue(uri.exists())
1268 self.assertTrue(
1269 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle")
1270 )
1272 # Check the template based on dimensions
1273 if hasattr(butler.datastore, "templates"):
1274 butler.datastore.templates.validateTemplates([ref])
1276 # Put with extra data ID keys (physical_filter is an optional
1277 # dependency); should not change template (at least the way we're
1278 # defining them to behave now; the important thing is that they
1279 # must be consistent).
1280 ref = butler.put(metric, "metric2", dataId2)
1281 uri = butler.getURI(ref)
1282 self.assertTrue(uri.exists())
1283 self.assertTrue(
1284 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle")
1285 )
1287 # Check the template based on dimensions
1288 if hasattr(butler.datastore, "templates"):
1289 butler.datastore.templates.validateTemplates([ref])
1291 # Use a template that has a typo in dimension record metadata.
1292 # Easier to test with a butler that has a ref with records attached.
1293 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
1294 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1295 path = template.format(ref)
1296 self.assertEqual(path, f"a/v423/{ref.id}_fits")
1298 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
1299 with self.assertRaises(KeyError):
1300 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1301 template.format(ref)
1303 # Now use a file template that will not result in unique filenames
1304 with self.assertRaises(FileTemplateValidationError):
1305 butler.put(metric, "metric3", dataId1)
1307 def testImportExport(self) -> None:
1308 # Run put/get tests just to create and populate a repo.
1309 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1310 self.runImportExportTest(storageClass)
1312 @unittest.expectedFailure
1313 def testImportExportVirtualComposite(self) -> None:
1314 # Run put/get tests just to create and populate a repo.
1315 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1316 self.runImportExportTest(storageClass)
1318 def runImportExportTest(self, storageClass: StorageClass) -> None:
1319 """Test exporting and importing.
1321 This test does an export to a temp directory and an import back
1322 into a new temp directory repo. It does not assume a posix datastore.
1323 """
1324 exportButler = self.runPutGetTest(storageClass, "test_metric")
1326 # Test that we must have a file extension.
1327 with self.assertRaises(ValueError):
1328 with exportButler.export(filename="dump", directory=".") as export:
1329 pass
1331 # Test that unknown format is not allowed.
1332 with self.assertRaises(ValueError):
1333 with exportButler.export(filename="dump.fits", directory=".") as export:
1334 pass
1336 # Test that the repo actually has at least one dataset.
1337 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1338 self.assertGreater(len(datasets), 0)
1339 # Add a DimensionRecord that's unused by those datasets.
1340 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1341 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1342 # Export and then import datasets.
1343 with safeTestTempDir(TESTDIR) as exportDir:
1344 exportFile = os.path.join(exportDir, "exports.yaml")
1345 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1346 export.saveDatasets(datasets)
1347 # Export the same datasets again. This should quietly do
1348 # nothing because of internal deduplication, and it shouldn't
1349 # complain about being asked to export the "htm7" elements even
1350 # though there aren't any in these datasets or in the database.
1351 export.saveDatasets(datasets, elements=["htm7"])
1352 # Save one of the data IDs again; this should be harmless
1353 # because of internal deduplication.
1354 export.saveDataIds([datasets[0].dataId])
1355 # Save some dimension records directly.
1356 export.saveDimensionData("skymap", [skymapRecord])
1357 self.assertTrue(os.path.exists(exportFile))
1358 with safeTestTempDir(TESTDIR) as importDir:
1359 # We always want this to be a local posix butler
1360 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1361 # Calling script.butlerImport tests the implementation of the
1362 # butler command line interface "import" subcommand. Functions
1363 # in the script folder are generally considered protected and
1364 # should not be used as public api.
1365 with open(exportFile) as f:
1366 script.butlerImport(
1367 importDir,
1368 export_file=f,
1369 directory=exportDir,
1370 transfer="auto",
1371 skip_dimensions=None,
1372 )
1373 importButler = Butler(importDir, run=self.default_run)
1374 for ref in datasets:
1375 with self.subTest(ref=ref):
1376 # Test for existence by passing in the DatasetType and
1377 # data ID separately, to avoid lookup by dataset_id.
1378 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId))
1379 self.assertEqual(
1380 list(importButler.registry.queryDimensionRecords("skymap")),
1381 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)],
1382 )
1384 def testRemoveRuns(self) -> None:
1385 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1386 butler = Butler(self.tmpConfigFile, writeable=True)
1387 # Load registry data with dimensions to hang datasets off of.
1388 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1389 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1390 # Add some RUN-type collection.
1391 run1 = "run1"
1392 butler.registry.registerRun(run1)
1393 run2 = "run2"
1394 butler.registry.registerRun(run2)
1395 # put a dataset in each
1396 metric = makeExampleMetrics()
1397 dimensions = butler.dimensions.extract(["instrument", "physical_filter"])
1398 datasetType = self.addDatasetType(
1399 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1400 )
1401 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1402 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1403 uri1 = butler.getURI(ref1)
1404 uri2 = butler.getURI(ref2)
1406 with self.assertRaises(OrphanedRecordError):
1407 butler.registry.removeDatasetType(datasetType.name)
1409 # Remove from both runs with different values for unstore.
1410 butler.removeRuns([run1], unstore=True)
1411 butler.removeRuns([run2], unstore=False)
1412 # Should be nothing in registry for either one, and datastore should
1413 # not think either exists.
1414 with self.assertRaises(MissingCollectionError):
1415 butler.registry.getCollectionType(run1)
1416 with self.assertRaises(MissingCollectionError):
1417 butler.registry.getCollectionType(run2)
1418 self.assertFalse(butler.datastore.exists(ref1))
1419 self.assertFalse(butler.datastore.exists(ref2))
1420 # The ref we unstored should be gone according to the URI, but the
1421 # one we forgot should still be around.
1422 self.assertFalse(uri1.exists())
1423 self.assertTrue(uri2.exists())
1425 # Now that the collections have been pruned we can remove the
1426 # dataset type
1427 butler.registry.removeDatasetType(datasetType.name)
1429 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm:
1430 butler.registry.removeDatasetType(tuple(["test*", "test*"]))
1431 self.assertIn("not defined", "\n".join(cm.output))
1434class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1435 """PosixDatastore specialization of a butler"""
1437 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1438 fullConfigKey: str | None = ".datastore.formatters"
1439 validationCanFail = True
1440 datastoreStr = ["/tmp"]
1441 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1442 registryStr = "/gen3.sqlite3"
1444 def testPathConstructor(self) -> None:
1445 """Independent test of constructor using PathLike."""
1446 butler = Butler(self.tmpConfigFile, run=self.default_run)
1447 self.assertIsInstance(butler, Butler)
1449 # And again with a Path object with the butler yaml
1450 path = pathlib.Path(self.tmpConfigFile)
1451 butler = Butler(path, writeable=False)
1452 self.assertIsInstance(butler, Butler)
1454 # And again with a Path object without the butler yaml
1455 # (making sure we skip it if the tmp config doesn't end
1456 # in butler.yaml -- which is the case for a subclass)
1457 if self.tmpConfigFile.endswith("butler.yaml"):
1458 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1459 butler = Butler(path, writeable=False)
1460 self.assertIsInstance(butler, Butler)
1462 def testExportTransferCopy(self) -> None:
1463 """Test local export using all transfer modes"""
1464 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1465 exportButler = self.runPutGetTest(storageClass, "test_metric")
1466 # Test that the repo actually has at least one dataset.
1467 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1468 self.assertGreater(len(datasets), 0)
1469 uris = [exportButler.getURI(d) for d in datasets]
1470 assert isinstance(exportButler.datastore, FileDatastore)
1471 datastoreRoot = exportButler.datastore.root
1473 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1475 for path in pathsInStore:
1476 # Assume local file system
1477 assert path is not None
1478 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}")
1480 for transfer in ("copy", "link", "symlink", "relsymlink"):
1481 with safeTestTempDir(TESTDIR) as exportDir:
1482 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export:
1483 export.saveDatasets(datasets)
1484 for path in pathsInStore:
1485 assert path is not None
1486 self.assertTrue(
1487 self.checkFileExists(exportDir, path),
1488 f"Check that mode {transfer} exported files",
1489 )
1491 def testPruneDatasets(self) -> None:
1492 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1493 butler = Butler(self.tmpConfigFile, writeable=True)
1494 assert isinstance(butler.datastore, FileDatastore)
1495 # Load registry data with dimensions to hang datasets off of.
1496 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1497 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1498 # Add some RUN-type collections.
1499 run1 = "run1"
1500 butler.registry.registerRun(run1)
1501 run2 = "run2"
1502 butler.registry.registerRun(run2)
1503 # put some datasets. ref1 and ref2 have the same data ID, and are in
1504 # different runs. ref3 has a different data ID.
1505 metric = makeExampleMetrics()
1506 dimensions = butler.dimensions.extract(["instrument", "physical_filter"])
1507 datasetType = self.addDatasetType(
1508 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1509 )
1510 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1511 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1512 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1514 many_stored = butler.stored_many([ref1, ref2, ref3])
1515 for ref, stored in many_stored.items():
1516 self.assertTrue(stored, f"Ref {ref} should be stored")
1518 many_exists = butler._exists_many([ref1, ref2, ref3])
1519 for ref, exists in many_exists.items():
1520 self.assertTrue(exists, f"Checking ref {ref} exists.")
1521 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored")
1523 # Simple prune.
1524 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1525 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1))
1527 many_stored = butler.stored_many([ref1, ref2, ref3])
1528 for ref, stored in many_stored.items():
1529 self.assertFalse(stored, f"Ref {ref} should not be stored")
1531 many_exists = butler._exists_many([ref1, ref2, ref3])
1532 for ref, exists in many_exists.items():
1533 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored")
1535 # Put data back.
1536 ref1_new = butler.put(metric, ref1)
1537 self.assertEqual(ref1_new, ref1) # Reuses original ID.
1538 ref2 = butler.put(metric, ref2)
1540 many_stored = butler.stored_many([ref1, ref2, ref3])
1541 self.assertTrue(many_stored[ref1])
1542 self.assertTrue(many_stored[ref2])
1543 self.assertFalse(many_stored[ref3])
1545 ref3 = butler.put(metric, ref3)
1547 many_exists = butler._exists_many([ref1, ref2, ref3])
1548 for ref, exists in many_exists.items():
1549 self.assertTrue(exists, f"Ref {ref} should not be stored")
1551 # Clear out the datasets from registry and start again.
1552 refs = [ref1, ref2, ref3]
1553 butler.pruneDatasets(refs, purge=True, unstore=True)
1554 for ref in refs:
1555 butler.put(metric, ref)
1557 # Test different forms of file availability.
1558 # Need to be in a state where:
1559 # - one ref just has registry record.
1560 # - one ref has a missing file but a datastore record.
1561 # - one ref has a missing datastore record but file is there.
1562 # - one ref does not exist anywhere.
1563 # Do not need to test a ref that has everything since that is tested
1564 # above.
1565 ref0 = DatasetRef(
1566 datasetType,
1567 DataCoordinate.standardize(
1568 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions
1569 ),
1570 run=run1,
1571 )
1573 # Delete from datastore and retain in Registry.
1574 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False)
1576 # File has been removed.
1577 uri2 = butler.datastore.getURI(ref2)
1578 uri2.remove()
1580 # Datastore has lost track.
1581 butler.datastore.forget([ref3])
1583 # First test with a standard butler.
1584 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1585 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1586 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1587 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1588 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED)
1590 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False)
1591 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1592 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1593 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN)
1594 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1595 self.assertTrue(exists_many[ref2])
1597 # Check that per-ref query gives the same answer as many query.
1598 for ref, exists in exists_many.items():
1599 self.assertEqual(butler.exists(ref, full_check=False), exists)
1601 # Test again with a trusting butler.
1602 butler.datastore.trustGetRequest = True
1603 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1604 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1605 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1606 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1607 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT)
1609 # Check that per-ref query gives the same answer as many query.
1610 for ref, exists in exists_many.items():
1611 self.assertEqual(butler.exists(ref, full_check=True), exists)
1613 # Create a ref that surprisingly has the UUID of an existing ref
1614 # but is not the same.
1615 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id)
1616 with self.assertRaises(ValueError):
1617 butler.exists(ref_bad)
1619 # Create a ref that has a compatible storage class.
1620 ref_compat = ref2.overrideStorageClass("StructuredDataDict")
1621 exists = butler.exists(ref_compat)
1622 self.assertEqual(exists, exists_many[ref2])
1624 # Remove everything and start from scratch.
1625 butler.datastore.trustGetRequest = False
1626 butler.pruneDatasets(refs, purge=True, unstore=True)
1627 for ref in refs:
1628 butler.put(metric, ref)
1630 # These tests mess directly with the trash table and can leave the
1631 # datastore in an odd state. Do them at the end.
1632 # Check that in normal mode, deleting the record will lead to
1633 # trash not touching the file.
1634 uri1 = butler.datastore.getURI(ref1)
1635 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table
1636 butler.datastore.forget([ref1])
1637 butler.datastore.trash(ref1)
1638 butler.datastore.emptyTrash()
1639 self.assertTrue(uri1.exists())
1640 uri1.remove() # Clean it up.
1642 # Simulate execution butler setup by deleting the datastore
1643 # record but keeping the file around and trusting.
1644 butler.datastore.trustGetRequest = True
1645 uri2 = butler.datastore.getURI(ref2)
1646 uri3 = butler.datastore.getURI(ref3)
1647 self.assertTrue(uri2.exists())
1648 self.assertTrue(uri3.exists())
1650 # Remove the datastore record.
1651 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table
1652 butler.datastore.forget([ref2])
1653 self.assertTrue(uri2.exists())
1654 butler.datastore.trash([ref2, ref3])
1655 # Immediate removal for ref2 file
1656 self.assertFalse(uri2.exists())
1657 # But ref3 has to wait for the empty.
1658 self.assertTrue(uri3.exists())
1659 butler.datastore.emptyTrash()
1660 self.assertFalse(uri3.exists())
1662 # Clear out the datasets from registry.
1663 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1665 def testPytypeCoercion(self) -> None:
1666 """Test python type coercion on Butler.get and put."""
1667 # Store some data with the normal example storage class.
1668 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1669 datasetTypeName = "test_metric"
1670 butler = self.runPutGetTest(storageClass, datasetTypeName)
1672 dataId = {"instrument": "DummyCamComp", "visit": 423}
1673 metric = butler.get(datasetTypeName, dataId=dataId)
1674 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample")
1676 datasetType_ori = butler.registry.getDatasetType(datasetTypeName)
1677 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents")
1679 # Now need to hack the registry dataset type definition.
1680 # There is no API for this.
1681 assert isinstance(butler.registry, SqlRegistry)
1682 manager = butler.registry._managers.datasets
1683 assert hasattr(manager, "_db") and hasattr(manager, "_static")
1684 manager._db.update(
1685 manager._static.dataset_type,
1686 {"name": datasetTypeName},
1687 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"},
1688 )
1690 # Force reset of dataset type cache
1691 butler.registry.refresh()
1693 datasetType_new = butler.registry.getDatasetType(datasetTypeName)
1694 self.assertEqual(datasetType_new.name, datasetType_ori.name)
1695 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel")
1697 metric_model = butler.get(datasetTypeName, dataId=dataId)
1698 self.assertNotEqual(type(metric_model), type(metric))
1699 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel")
1701 # Put the model and read it back to show that everything now
1702 # works as normal.
1703 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424)
1704 metric_model_new = butler.get(metric_ref)
1705 self.assertEqual(metric_model_new, metric_model)
1707 # Hack the storage class again to something that will fail on the
1708 # get with no conversion class.
1709 manager._db.update(
1710 manager._static.dataset_type,
1711 {"name": datasetTypeName},
1712 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"},
1713 )
1714 butler.registry.refresh()
1716 with self.assertRaises(ValueError):
1717 butler.get(datasetTypeName, dataId=dataId)
1720@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1721class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1722 """PosixDatastore specialization of a butler using Postgres"""
1724 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1725 fullConfigKey = ".datastore.formatters"
1726 validationCanFail = True
1727 datastoreStr = ["/tmp"]
1728 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1729 registryStr = "PostgreSQL@test"
1730 postgresql: Any
1732 @staticmethod
1733 def _handler(postgresql: Any) -> None:
1734 engine = sqlalchemy.engine.create_engine(postgresql.url())
1735 with engine.begin() as connection:
1736 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
1738 @classmethod
1739 def setUpClass(cls) -> None:
1740 # Create the postgres test server.
1741 cls.postgresql = testing.postgresql.PostgresqlFactory(
1742 cache_initialized_db=True, on_initialized=cls._handler
1743 )
1744 super().setUpClass()
1746 @classmethod
1747 def tearDownClass(cls) -> None:
1748 # Clean up any lingering SQLAlchemy engines/connections
1749 # so they're closed before we shut down the server.
1750 gc.collect()
1751 cls.postgresql.clear_cache()
1752 super().tearDownClass()
1754 def setUp(self) -> None:
1755 self.server = self.postgresql()
1757 # Need to add a registry section to the config.
1758 self._temp_config = False
1759 config = Config(self.configFile)
1760 config["registry", "db"] = self.server.url()
1761 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh:
1762 config.dump(fh)
1763 self.configFile = fh.name
1764 self._temp_config = True
1765 super().setUp()
1767 def tearDown(self) -> None:
1768 self.server.stop()
1769 if self._temp_config and os.path.exists(self.configFile):
1770 os.remove(self.configFile)
1771 super().tearDown()
1773 def testMakeRepo(self) -> None:
1774 # The base class test assumes that it's using sqlite and assumes
1775 # the config file is acceptable to sqlite.
1776 raise unittest.SkipTest("Postgres config is not compatible with this test.")
1779class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1780 """InMemoryDatastore specialization of a butler"""
1782 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1783 fullConfigKey = None
1784 useTempRoot = False
1785 validationCanFail = False
1786 datastoreStr = ["datastore='InMemory"]
1787 datastoreName = ["InMemoryDatastore@"]
1788 registryStr = "/gen3.sqlite3"
1790 def testIngest(self) -> None:
1791 pass
1794class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1795 """PosixDatastore specialization"""
1797 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1798 fullConfigKey = ".datastore.datastores.1.formatters"
1799 validationCanFail = True
1800 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1801 datastoreName = [
1802 "InMemoryDatastore@",
1803 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1804 "SecondDatastore",
1805 ]
1806 registryStr = "/gen3.sqlite3"
1809class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1810 """Test that a yaml file in one location can refer to a root in another."""
1812 datastoreStr = ["dir1"]
1813 # Disable the makeRepo test since we are deliberately not using
1814 # butler.yaml as the config name.
1815 fullConfigKey = None
1817 def setUp(self) -> None:
1818 self.root = makeTestTempDir(TESTDIR)
1820 # Make a new repository in one place
1821 self.dir1 = os.path.join(self.root, "dir1")
1822 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1824 # Move the yaml file to a different place and add a "root"
1825 self.dir2 = os.path.join(self.root, "dir2")
1826 os.makedirs(self.dir2, exist_ok=True)
1827 configFile1 = os.path.join(self.dir1, "butler.yaml")
1828 config = Config(configFile1)
1829 config["root"] = self.dir1
1830 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1831 config.dumpToUri(configFile2)
1832 os.remove(configFile1)
1833 self.tmpConfigFile = configFile2
1835 def testFileLocations(self) -> None:
1836 self.assertNotEqual(self.dir1, self.dir2)
1837 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1838 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1839 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1842class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1843 """Test that a config file created by makeRepo outside of repo works."""
1845 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1847 def setUp(self) -> None:
1848 self.root = makeTestTempDir(TESTDIR)
1849 self.root2 = makeTestTempDir(TESTDIR)
1851 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1852 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1854 def tearDown(self) -> None:
1855 if os.path.exists(self.root2):
1856 shutil.rmtree(self.root2, ignore_errors=True)
1857 super().tearDown()
1859 def testConfigExistence(self) -> None:
1860 c = Config(self.tmpConfigFile)
1861 uri_config = ResourcePath(c["root"])
1862 uri_expected = ResourcePath(self.root, forceDirectory=True)
1863 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1864 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1866 def testPutGet(self) -> None:
1867 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1868 self.runPutGetTest(storageClass, "test_metric")
1871class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1872 """Test that a config file created by makeRepo outside of repo works."""
1874 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1876 def setUp(self) -> None:
1877 self.root = makeTestTempDir(TESTDIR)
1878 self.root2 = makeTestTempDir(TESTDIR)
1880 self.tmpConfigFile = self.root2
1881 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1883 def testConfigExistence(self) -> None:
1884 # Append the yaml file else Config constructor does not know the file
1885 # type.
1886 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1887 super().testConfigExistence()
1890class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1891 """Test that a config file created by makeRepo outside of repo works."""
1893 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1895 def setUp(self) -> None:
1896 self.root = makeTestTempDir(TESTDIR)
1897 self.root2 = makeTestTempDir(TESTDIR)
1899 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl()
1900 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1903@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1904class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1905 """S3Datastore specialization of a butler; an S3 storage Datastore +
1906 a local in-memory SqlRegistry.
1907 """
1909 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1910 fullConfigKey = None
1911 validationCanFail = True
1913 bucketName = "anybucketname"
1914 """Name of the Bucket that will be used in the tests. The name is read from
1915 the config file used with the tests during set-up.
1916 """
1918 root = "butlerRoot/"
1919 """Root repository directory expected to be used in case useTempRoot=False.
1920 Otherwise the root is set to a 20 characters long randomly generated string
1921 during set-up.
1922 """
1924 datastoreStr = [f"datastore={root}"]
1925 """Contains all expected root locations in a format expected to be
1926 returned by Butler stringification.
1927 """
1929 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1930 """The expected format of the S3 Datastore string."""
1932 registryStr = "/gen3.sqlite3"
1933 """Expected format of the Registry string."""
1935 mock_s3 = mock_s3()
1936 """The mocked s3 interface from moto."""
1938 def genRoot(self) -> str:
1939 """Return a random string of len 20 to serve as a root
1940 name for the temporary bucket repo.
1942 This is equivalent to tempfile.mkdtemp as this is what self.root
1943 becomes when useTempRoot is True.
1944 """
1945 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1946 return rndstr + "/"
1948 def setUp(self) -> None:
1949 config = Config(self.configFile)
1950 uri = ResourcePath(config[".datastore.datastore.root"])
1951 self.bucketName = uri.netloc
1953 # Enable S3 mocking of tests.
1954 self.mock_s3.start()
1956 # set up some fake credentials if they do not exist
1957 self.usingDummyCredentials = setAwsEnvCredentials()
1959 if self.useTempRoot:
1960 self.root = self.genRoot()
1961 rooturi = f"s3://{self.bucketName}/{self.root}"
1962 config.update({"datastore": {"datastore": {"root": rooturi}}})
1964 # need local folder to store registry database
1965 self.reg_dir = makeTestTempDir(TESTDIR)
1966 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1968 # MOTO needs to know that we expect Bucket bucketname to exist
1969 # (this used to be the class attribute bucketName)
1970 s3 = boto3.resource("s3")
1971 s3.create_bucket(Bucket=self.bucketName)
1973 self.datastoreStr = [f"datastore='{rooturi}'"]
1974 self.datastoreName = [f"FileDatastore@{rooturi}"]
1975 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1976 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1978 def tearDown(self) -> None:
1979 s3 = boto3.resource("s3")
1980 bucket = s3.Bucket(self.bucketName)
1981 try:
1982 bucket.objects.all().delete()
1983 except botocore.exceptions.ClientError as e:
1984 if e.response["Error"]["Code"] == "404":
1985 # the key was not reachable - pass
1986 pass
1987 else:
1988 raise
1990 bucket = s3.Bucket(self.bucketName)
1991 bucket.delete()
1993 # Stop the S3 mock.
1994 self.mock_s3.stop()
1996 # unset any potentially set dummy credentials
1997 if self.usingDummyCredentials:
1998 unsetAwsEnvCredentials()
2000 if self.reg_dir is not None and os.path.exists(self.reg_dir):
2001 shutil.rmtree(self.reg_dir, ignore_errors=True)
2003 if self.useTempRoot and os.path.exists(self.root):
2004 shutil.rmtree(self.root, ignore_errors=True)
2006 super().tearDown()
2009class PosixDatastoreTransfers(unittest.TestCase):
2010 """Test data transfers between butlers.
2012 Test for different managers. UUID to UUID and integer to integer are
2013 tested. UUID to integer is not supported since we do not currently
2014 want to allow that. Integer to UUID is supported with the caveat
2015 that UUID4 will be generated and this will be incorrect for raw
2016 dataset types. The test ignores that.
2017 """
2019 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2020 storageClassFactory: StorageClassFactory
2022 @classmethod
2023 def setUpClass(cls) -> None:
2024 cls.storageClassFactory = StorageClassFactory()
2025 cls.storageClassFactory.addFromConfig(cls.configFile)
2027 def setUp(self) -> None:
2028 self.root = makeTestTempDir(TESTDIR)
2029 self.config = Config(self.configFile)
2031 def tearDown(self) -> None:
2032 removeTestTempDir(self.root)
2034 def create_butler(self, manager: str, label: str) -> Butler:
2035 config = Config(self.configFile)
2036 config["registry", "managers", "datasets"] = manager
2037 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True)
2039 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None:
2040 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
2041 if manager1 is None:
2042 manager1 = default
2043 if manager2 is None:
2044 manager2 = default
2045 self.source_butler = self.create_butler(manager1, "1")
2046 self.target_butler = self.create_butler(manager2, "2")
2048 def testTransferUuidToUuid(self) -> None:
2049 self.create_butlers()
2050 self.assertButlerTransfers()
2052 def _enable_trust(self, datastore: Datastore) -> None:
2053 if hasattr(datastore, "trustGetRequest"):
2054 datastore.trustGetRequest = True
2055 elif hasattr(datastore, "datastores"):
2056 for datastore in datastore.datastores:
2057 if hasattr(datastore, "trustGetRequest"):
2058 datastore.trustGetRequest = True
2060 def testTransferMissing(self) -> None:
2061 """Test transfers where datastore records are missing.
2063 This is how execution butler works.
2064 """
2065 self.create_butlers()
2067 # Configure the source butler to allow trust.
2068 self._enable_trust(self.source_butler.datastore)
2070 self.assertButlerTransfers(purge=True)
2072 def testTransferMissingDisassembly(self) -> None:
2073 """Test transfers where datastore records are missing.
2075 This is how execution butler works.
2076 """
2077 self.create_butlers()
2079 # Configure the source butler to allow trust.
2080 self._enable_trust(self.source_butler.datastore)
2082 # Test disassembly.
2083 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
2085 def testAbsoluteURITransferDirect(self) -> None:
2086 """Test transfer using an absolute URI."""
2087 self._absolute_transfer("auto")
2089 def testAbsoluteURITransferCopy(self) -> None:
2090 """Test transfer using an absolute URI."""
2091 self._absolute_transfer("copy")
2093 def _absolute_transfer(self, transfer: str) -> None:
2094 self.create_butlers()
2096 storageClassName = "StructuredData"
2097 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2098 datasetTypeName = "random_data"
2099 run = "run1"
2100 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2102 dimensions = self.source_butler.dimensions.extract(())
2103 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2104 self.source_butler.registry.registerDatasetType(datasetType)
2106 metrics = makeExampleMetrics()
2107 with ResourcePath.temporary_uri(suffix=".json") as temp:
2108 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions)
2109 source_refs = [DatasetRef(datasetType, dataId, run=run)]
2110 temp.write(json.dumps(metrics.exportAsDict()).encode())
2111 dataset = FileDataset(path=temp, refs=source_refs)
2112 self.source_butler.ingest(dataset, transfer="direct")
2114 self.target_butler.transfer_from(
2115 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer
2116 )
2118 uri = self.target_butler.getURI(dataset.refs[0])
2119 if transfer == "auto":
2120 self.assertEqual(uri, temp)
2121 else:
2122 self.assertNotEqual(uri, temp)
2124 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None:
2125 """Test that a run can be transferred to another butler."""
2126 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2127 datasetTypeName = "random_data"
2129 # Test will create 3 collections and we will want to transfer
2130 # two of those three.
2131 runs = ["run1", "run2", "other"]
2133 # Also want to use two different dataset types to ensure that
2134 # grouping works.
2135 datasetTypeNames = ["random_data", "random_data_2"]
2137 # Create the run collections in the source butler.
2138 for run in runs:
2139 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2141 # Create dimensions in source butler.
2142 n_exposures = 30
2143 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
2144 self.source_butler.registry.insertDimensionData(
2145 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
2146 )
2147 self.source_butler.registry.insertDimensionData(
2148 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
2149 )
2151 for i in range(n_exposures):
2152 self.source_butler.registry.insertDimensionData(
2153 "exposure",
2154 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"},
2155 )
2157 # Create dataset types in the source butler.
2158 dimensions = self.source_butler.dimensions.extract(["instrument", "exposure"])
2159 for datasetTypeName in datasetTypeNames:
2160 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2161 self.source_butler.registry.registerDatasetType(datasetType)
2163 # Write a dataset to an unrelated run -- this will ensure that
2164 # we are rewriting integer dataset ids in the target if necessary.
2165 # Will not be relevant for UUID.
2166 run = "distraction"
2167 butler = Butler(butler=self.source_butler, run=run)
2168 butler.put(
2169 makeExampleMetrics(),
2170 datasetTypeName,
2171 exposure=1,
2172 instrument="DummyCamComp",
2173 physical_filter="d-r",
2174 )
2176 # Write some example metrics to the source
2177 butler = Butler(butler=self.source_butler)
2179 # Set of DatasetRefs that should be in the list of refs to transfer
2180 # but which will not be transferred.
2181 deleted: set[DatasetRef] = set()
2183 n_expected = 20 # Number of datasets expected to be transferred
2184 source_refs = []
2185 for i in range(n_exposures):
2186 # Put a third of datasets into each collection, only retain
2187 # two thirds.
2188 index = i % 3
2189 run = runs[index]
2190 datasetTypeName = datasetTypeNames[i % 2]
2192 metric = MetricsExample(
2193 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)]
2194 )
2195 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"}
2196 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
2198 # Remove the datastore record using low-level API
2199 if purge:
2200 # Remove records for a fraction.
2201 if index == 1:
2202 # For one of these delete the file as well.
2203 # This allows the "missing" code to filter the
2204 # file out.
2205 # Access the individual datastores.
2206 datastores = []
2207 if hasattr(butler.datastore, "datastores"):
2208 datastores.extend(butler.datastore.datastores)
2209 else:
2210 datastores.append(butler.datastore)
2212 if not deleted:
2213 # For a chained datastore we need to remove
2214 # files in each chain.
2215 for datastore in datastores:
2216 # The file might not be known to the datastore
2217 # if constraints are used.
2218 try:
2219 primary, uris = datastore.getURIs(ref)
2220 except FileNotFoundError:
2221 continue
2222 if primary:
2223 if primary.scheme != "mem":
2224 primary.remove()
2225 for uri in uris.values():
2226 if uri.scheme != "mem":
2227 uri.remove()
2228 n_expected -= 1
2229 deleted.add(ref)
2231 # Remove the datastore record.
2232 for datastore in datastores:
2233 if hasattr(datastore, "removeStoredItemInfo"):
2234 datastore.removeStoredItemInfo(ref)
2236 if index < 2:
2237 source_refs.append(ref)
2238 if ref not in deleted:
2239 new_metric = butler.get(ref)
2240 self.assertEqual(new_metric, metric)
2242 # Create some bad dataset types to ensure we check for inconsistent
2243 # definitions.
2244 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
2245 for datasetTypeName in datasetTypeNames:
2246 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
2247 self.target_butler.registry.registerDatasetType(datasetType)
2248 with self.assertRaises(ConflictingDefinitionError) as cm:
2249 self.target_butler.transfer_from(self.source_butler, source_refs)
2250 self.assertIn("dataset type differs", str(cm.exception))
2252 # And remove the bad definitions.
2253 for datasetTypeName in datasetTypeNames:
2254 self.target_butler.registry.removeDatasetType(datasetTypeName)
2256 # Transfer without creating dataset types should fail.
2257 with self.assertRaises(KeyError):
2258 self.target_butler.transfer_from(self.source_butler, source_refs)
2260 # Transfer without creating dimensions should fail.
2261 with self.assertRaises(ConflictingDefinitionError) as cm:
2262 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True)
2263 self.assertIn("dimension", str(cm.exception))
2265 # The failed transfer above leaves registry in an inconsistent
2266 # state because the run is created but then rolled back without
2267 # the collection cache being cleared. For now force a refresh.
2268 # Can remove with DM-35498.
2269 self.target_butler.registry.refresh()
2271 # Now transfer them to the second butler, including dimensions.
2272 with self.assertLogs(level=logging.DEBUG) as log_cm:
2273 transferred = self.target_butler.transfer_from(
2274 self.source_butler,
2275 source_refs,
2276 register_dataset_types=True,
2277 transfer_dimensions=True,
2278 )
2279 self.assertEqual(len(transferred), n_expected)
2280 log_output = ";".join(log_cm.output)
2282 # A ChainedDatastore will use the in-memory datastore for mexists
2283 # so we can not rely on the mexists log message.
2284 self.assertIn("Number of datastore records found in source", log_output)
2285 self.assertIn("Creating output run", log_output)
2287 # Do the transfer twice to ensure that it will do nothing extra.
2288 # Only do this if purge=True because it does not work for int
2289 # dataset_id.
2290 if purge:
2291 # This should not need to register dataset types.
2292 transferred = self.target_butler.transfer_from(self.source_butler, source_refs)
2293 self.assertEqual(len(transferred), n_expected)
2295 # Also do an explicit low-level transfer to trigger some
2296 # edge cases.
2297 with self.assertLogs(level=logging.DEBUG) as log_cm:
2298 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs)
2299 log_output = ";".join(log_cm.output)
2300 self.assertIn("no file artifacts exist", log_output)
2302 with self.assertRaises((TypeError, AttributeError)):
2303 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) # type: ignore
2305 with self.assertRaises(ValueError):
2306 self.target_butler.datastore.transfer_from(
2307 self.source_butler.datastore, source_refs, transfer="split"
2308 )
2310 # Now try to get the same refs from the new butler.
2311 for ref in source_refs:
2312 if ref not in deleted:
2313 new_metric = self.target_butler.get(ref)
2314 old_metric = self.source_butler.get(ref)
2315 self.assertEqual(new_metric, old_metric)
2317 # Now prune run2 collection and create instead a CHAINED collection.
2318 # This should block the transfer.
2319 self.target_butler.removeRuns(["run2"], unstore=True)
2320 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
2321 with self.assertRaises(CollectionTypeError):
2322 # Re-importing the run1 datasets can be problematic if they
2323 # use integer IDs so filter those out.
2324 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
2325 self.target_butler.transfer_from(self.source_butler, to_transfer)
2328class ChainedDatastoreTransfers(PosixDatastoreTransfers):
2329 """Test transfers using a chained datastore."""
2331 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
2334def setup_module(module: types.ModuleType) -> None:
2335 """Set up the module for pytest."""
2336 clean_environment()
2339if __name__ == "__main__":
2340 clean_environment()
2341 unittest.main()