Coverage for tests/test_butler.py: 13%
1295 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
24from __future__ import annotations
26import gc
27import json
28import logging
29import os
30import pathlib
31import pickle
32import posixpath
33import random
34import shutil
35import string
36import tempfile
37import unittest
38import uuid
39from collections.abc import Mapping
40from typing import TYPE_CHECKING, Any, cast
42try:
43 import boto3
44 import botocore
45 from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
46 from moto import mock_s3 # type: ignore[import]
47except ImportError:
48 boto3 = None
50 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def]
51 """No-op decorator in case moto mock_s3 can not be imported."""
52 return None
55try:
56 # It's possible but silly to have testing.postgresql installed without
57 # having the postgresql server installed (because then nothing in
58 # testing.postgresql would work), so we use the presence of that module
59 # to test whether we can expect the server to be available.
60 import testing.postgresql # type: ignore[import]
61except ImportError:
62 testing = None
64import astropy.time
65import sqlalchemy
66from lsst.daf.butler import (
67 Butler,
68 ButlerConfig,
69 ButlerRepoIndex,
70 CollectionType,
71 Config,
72 DataCoordinate,
73 DatasetExistence,
74 DatasetRef,
75 DatasetType,
76 FileDataset,
77 FileTemplate,
78 FileTemplateValidationError,
79 NullDatastore,
80 StorageClassFactory,
81 ValidationError,
82 script,
83)
84from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
85from lsst.daf.butler.datastores.fileDatastore import FileDatastore
86from lsst.daf.butler.registries.sql import SqlRegistry
87from lsst.daf.butler.registry import (
88 CollectionError,
89 CollectionTypeError,
90 ConflictingDefinitionError,
91 DataIdValueError,
92 MissingCollectionError,
93 OrphanedRecordError,
94)
95from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter
96from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir
97from lsst.resources import ResourcePath
98from lsst.utils import doImportType
99from lsst.utils.introspection import get_full_type_name
101if TYPE_CHECKING:
102 import types
104 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass
106TESTDIR = os.path.abspath(os.path.dirname(__file__))
109def clean_environment() -> None:
110 """Remove external environment variables that affect the tests."""
111 for k in (
112 "DAF_BUTLER_REPOSITORY_INDEX",
113 "S3_ENDPOINT_URL",
114 "AWS_ACCESS_KEY_ID",
115 "AWS_SECRET_ACCESS_KEY",
116 "AWS_SHARED_CREDENTIALS_FILE",
117 ):
118 os.environ.pop(k, None)
121def makeExampleMetrics() -> MetricsExample:
122 """Return example dataset suitable for tests."""
123 return MetricsExample(
124 {"AM1": 5.2, "AM2": 30.6},
125 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
126 [563, 234, 456.7, 752, 8, 9, 27],
127 )
130class TransactionTestError(Exception):
131 """Specific error for testing transactions, to prevent misdiagnosing
132 that might otherwise occur when a standard exception is used.
133 """
135 pass
138class ButlerConfigTests(unittest.TestCase):
139 """Simple tests for ButlerConfig that are not tested in any other test
140 cases.
141 """
143 def testSearchPath(self) -> None:
144 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
145 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
146 config1 = ButlerConfig(configFile)
147 self.assertNotIn("testConfigs", "\n".join(cm.output))
149 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
150 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
151 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
152 self.assertIn("testConfigs", "\n".join(cm.output))
154 key = ("datastore", "records", "table")
155 self.assertNotEqual(config1[key], config2[key])
156 self.assertEqual(config2[key], "override_record")
159class ButlerPutGetTests(TestCaseMixin):
160 """Helper method for running a suite of put/get tests from different
161 butler configurations.
162 """
164 root: str
165 default_run = "ingésτ😺"
166 storageClassFactory: StorageClassFactory
167 configFile: str
168 tmpConfigFile: str
170 @staticmethod
171 def addDatasetType(
172 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry
173 ) -> DatasetType:
174 """Create a DatasetType and register it"""
175 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
176 registry.registerDatasetType(datasetType)
177 return datasetType
179 @classmethod
180 def setUpClass(cls) -> None:
181 cls.storageClassFactory = StorageClassFactory()
182 cls.storageClassFactory.addFromConfig(cls.configFile)
184 def assertGetComponents(
185 self,
186 butler: Butler,
187 datasetRef: DatasetRef,
188 components: tuple[str, ...],
189 reference: Any,
190 collections: Any = None,
191 ) -> None:
192 datasetType = datasetRef.datasetType
193 dataId = datasetRef.dataId
194 deferred = butler.getDeferred(datasetRef)
196 for component in components:
197 compTypeName = datasetType.componentTypeName(component)
198 result = butler.get(compTypeName, dataId, collections=collections)
199 self.assertEqual(result, getattr(reference, component))
200 result_deferred = deferred.get(component=component)
201 self.assertEqual(result_deferred, result)
203 def tearDown(self) -> None:
204 removeTestTempDir(self.root)
206 def create_butler(
207 self, run: str, storageClass: StorageClass | str, datasetTypeName: str
208 ) -> tuple[Butler, DatasetType]:
209 butler = Butler(self.tmpConfigFile, run=run)
211 collections = set(butler.registry.queryCollections())
212 self.assertEqual(collections, {run})
214 # Create and register a DatasetType
215 dimensions = butler.dimensions.extract(["instrument", "visit"])
217 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
219 # Add needed Dimensions
220 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
221 butler.registry.insertDimensionData(
222 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
223 )
224 butler.registry.insertDimensionData(
225 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"}
226 )
227 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
228 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
229 butler.registry.insertDimensionData(
230 "visit",
231 {
232 "instrument": "DummyCamComp",
233 "id": 423,
234 "name": "fourtwentythree",
235 "physical_filter": "d-r",
236 "visit_system": 1,
237 "datetime_begin": visit_start,
238 "datetime_end": visit_end,
239 },
240 )
242 # Add more visits for some later tests
243 for visit_id in (424, 425):
244 butler.registry.insertDimensionData(
245 "visit",
246 {
247 "instrument": "DummyCamComp",
248 "id": visit_id,
249 "name": f"fourtwentyfour_{visit_id}",
250 "physical_filter": "d-r",
251 "visit_system": 1,
252 },
253 )
254 return butler, datasetType
256 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler:
257 # New datasets will be added to run and tag, but we will only look in
258 # tag when looking up datasets.
259 run = self.default_run
260 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
261 assert butler.run is not None
263 # Create and store a dataset
264 metric = makeExampleMetrics()
265 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423})
267 # Put and remove the dataset once as a DatasetRef, once as a dataId,
268 # and once with a DatasetType
270 # Keep track of any collections we add and do not clean up
271 expected_collections = {run}
273 counter = 0
274 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1")
275 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate]
276 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)):
277 # Since we are using subTest we can get cascading failures
278 # here with the first attempt failing and the others failing
279 # immediately because the dataset already exists. Work around
280 # this by using a distinct run collection each time
281 counter += 1
282 this_run = f"put_run_{counter}"
283 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
284 expected_collections.update({this_run})
286 with self.subTest(args=args):
287 kwargs: dict[str, Any] = {}
288 if not isinstance(args[0], DatasetRef): # type: ignore
289 kwargs["run"] = this_run
290 ref = butler.put(metric, *args, **kwargs)
291 self.assertIsInstance(ref, DatasetRef)
293 # Test getDirect
294 metricOut = butler.get(ref)
295 self.assertEqual(metric, metricOut)
296 # Test get
297 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
298 self.assertEqual(metric, metricOut)
299 # Test get with a datasetRef
300 metricOut = butler.get(ref)
301 self.assertEqual(metric, metricOut)
302 # Test getDeferred with dataId
303 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
304 self.assertEqual(metric, metricOut)
305 # Test getDeferred with a ref
306 metricOut = butler.getDeferred(ref).get()
307 self.assertEqual(metric, metricOut)
309 # Check we can get components
310 if storageClass.isComposite():
311 self.assertGetComponents(
312 butler, ref, ("summary", "data", "output"), metric, collections=this_run
313 )
315 # Can the artifacts themselves be retrieved?
316 if not butler._datastore.isEphemeral:
317 root_uri = ResourcePath(self.root)
319 for preserve_path in (True, False):
320 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
321 # Use copy so that we can test that overwrite
322 # protection works (using "auto" for File URIs would
323 # use hard links and subsequent transfer would work
324 # because it knows they are the same file).
325 transferred = butler.retrieveArtifacts(
326 [ref], destination, preserve_path=preserve_path, transfer="copy"
327 )
328 self.assertGreater(len(transferred), 0)
329 artifacts = list(ResourcePath.findFileResources([destination]))
330 self.assertEqual(set(transferred), set(artifacts))
332 for artifact in transferred:
333 path_in_destination = artifact.relative_to(destination)
334 self.assertIsNotNone(path_in_destination)
335 assert path_in_destination is not None
337 # when path is not preserved there should not be
338 # any path separators.
339 num_seps = path_in_destination.count("/")
340 if preserve_path:
341 self.assertGreater(num_seps, 0)
342 else:
343 self.assertEqual(num_seps, 0)
345 primary_uri, secondary_uris = butler.getURIs(ref)
346 n_uris = len(secondary_uris)
347 if primary_uri:
348 n_uris += 1
349 self.assertEqual(
350 len(artifacts),
351 n_uris,
352 "Comparing expected artifacts vs actual:"
353 f" {artifacts} vs {primary_uri} and {secondary_uris}",
354 )
356 if preserve_path:
357 # No need to run these twice
358 with self.assertRaises(ValueError):
359 butler.retrieveArtifacts([ref], destination, transfer="move")
361 with self.assertRaises(FileExistsError):
362 butler.retrieveArtifacts([ref], destination)
364 transferred_again = butler.retrieveArtifacts(
365 [ref], destination, preserve_path=preserve_path, overwrite=True
366 )
367 self.assertEqual(set(transferred_again), set(transferred))
369 # Now remove the dataset completely.
370 butler.pruneDatasets([ref], purge=True, unstore=True)
371 # Lookup with original args should still fail.
372 kwargs = {"collections": this_run}
373 if isinstance(args[0], DatasetRef):
374 kwargs = {} # Prevent warning from being issued.
375 self.assertFalse(butler.exists(*args, **kwargs))
376 # get() should still fail.
377 with self.assertRaises(FileNotFoundError):
378 butler.get(ref)
379 # Registry shouldn't be able to find it by dataset_id anymore.
380 self.assertIsNone(butler.registry.getDataset(ref.id))
382 # Do explicit registry removal since we know they are
383 # empty
384 butler.registry.removeCollection(this_run)
385 expected_collections.remove(this_run)
387 # Create DatasetRef for put using default run.
388 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run)
390 # Check that getDeferred fails with standalone ref.
391 with self.assertRaises(LookupError):
392 butler.getDeferred(refIn)
394 # Put the dataset again, since the last thing we did was remove it
395 # and we want to use the default collection.
396 ref = butler.put(metric, refIn)
398 # Get with parameters
399 stop = 4
400 sliced = butler.get(ref, parameters={"slice": slice(stop)})
401 self.assertNotEqual(metric, sliced)
402 self.assertEqual(metric.summary, sliced.summary)
403 self.assertEqual(metric.output, sliced.output)
404 assert metric.data is not None # for mypy
405 self.assertEqual(metric.data[:stop], sliced.data)
406 # getDeferred with parameters
407 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
408 self.assertNotEqual(metric, sliced)
409 self.assertEqual(metric.summary, sliced.summary)
410 self.assertEqual(metric.output, sliced.output)
411 self.assertEqual(metric.data[:stop], sliced.data)
412 # getDeferred with deferred parameters
413 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
414 self.assertNotEqual(metric, sliced)
415 self.assertEqual(metric.summary, sliced.summary)
416 self.assertEqual(metric.output, sliced.output)
417 self.assertEqual(metric.data[:stop], sliced.data)
419 if storageClass.isComposite():
420 # Check that components can be retrieved
421 metricOut = butler.get(ref.datasetType.name, dataId)
422 compNameS = ref.datasetType.componentTypeName("summary")
423 compNameD = ref.datasetType.componentTypeName("data")
424 summary = butler.get(compNameS, dataId)
425 self.assertEqual(summary, metric.summary)
426 data = butler.get(compNameD, dataId)
427 self.assertEqual(data, metric.data)
429 if "counter" in storageClass.derivedComponents:
430 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
431 self.assertEqual(count, len(data))
433 count = butler.get(
434 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)}
435 )
436 self.assertEqual(count, stop)
438 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
439 assert compRef is not None
440 summary = butler.get(compRef)
441 self.assertEqual(summary, metric.summary)
443 # Create a Dataset type that has the same name but is inconsistent.
444 inconsistentDatasetType = DatasetType(
445 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config")
446 )
448 # Getting with a dataset type that does not match registry fails
449 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"):
450 butler.get(inconsistentDatasetType, dataId)
452 # Combining a DatasetRef with a dataId should fail
453 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"):
454 butler.get(ref, dataId)
455 # Getting with an explicit ref should fail if the id doesn't match.
456 with self.assertRaises(FileNotFoundError):
457 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run))
459 # Getting a dataset with unknown parameters should fail
460 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"):
461 butler.get(ref, parameters={"unsupported": True})
463 # Check we have a collection
464 collections = set(butler.registry.queryCollections())
465 self.assertEqual(collections, expected_collections)
467 # Clean up to check that we can remove something that may have
468 # already had a component removed
469 butler.pruneDatasets([ref], unstore=True, purge=True)
471 # Add the same ref again, so we can check that duplicate put fails.
472 ref = butler.put(metric, datasetType, dataId)
474 # Repeat put will fail.
475 with self.assertRaisesRegex(
476 ConflictingDefinitionError, "A database constraint failure was triggered"
477 ):
478 butler.put(metric, datasetType, dataId)
480 # Remove the datastore entry.
481 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
483 # Put will still fail
484 with self.assertRaisesRegex(
485 ConflictingDefinitionError, "A database constraint failure was triggered"
486 ):
487 butler.put(metric, datasetType, dataId)
489 # Repeat the same sequence with resolved ref.
490 butler.pruneDatasets([ref], unstore=True, purge=True)
491 ref = butler.put(metric, refIn)
493 # Repeat put will fail.
494 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"):
495 butler.put(metric, refIn)
497 # Remove the datastore entry.
498 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
500 # In case of resolved ref this write will succeed.
501 ref = butler.put(metric, refIn)
503 # Leave the dataset in place since some downstream tests require
504 # something to be present
506 return butler
508 def testDeferredCollectionPassing(self) -> None:
509 # Construct a butler with no run or collection, but make it writeable.
510 butler = Butler(self.tmpConfigFile, writeable=True)
511 # Create and register a DatasetType
512 dimensions = butler.dimensions.extract(["instrument", "visit"])
513 datasetType = self.addDatasetType(
514 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry
515 )
516 # Add needed Dimensions
517 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
518 butler.registry.insertDimensionData(
519 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
520 )
521 butler.registry.insertDimensionData(
522 "visit",
523 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
524 )
525 dataId = {"instrument": "DummyCamComp", "visit": 423}
526 # Create dataset.
527 metric = makeExampleMetrics()
528 # Register a new run and put dataset.
529 run = "deferred"
530 self.assertTrue(butler.registry.registerRun(run))
531 # Second time it will be allowed but indicate no-op
532 self.assertFalse(butler.registry.registerRun(run))
533 ref = butler.put(metric, datasetType, dataId, run=run)
534 # Putting with no run should fail with TypeError.
535 with self.assertRaises(CollectionError):
536 butler.put(metric, datasetType, dataId)
537 # Dataset should exist.
538 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
539 # We should be able to get the dataset back, but with and without
540 # a deferred dataset handle.
541 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
542 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
543 # Trying to find the dataset without any collection is a TypeError.
544 self.assertFalse(butler.exists(datasetType, dataId))
545 with self.assertRaises(CollectionError):
546 butler.get(datasetType, dataId)
547 # Associate the dataset with a different collection.
548 butler.registry.registerCollection("tagged")
549 butler.registry.associate("tagged", [ref])
550 # Deleting the dataset from the new collection should make it findable
551 # in the original collection.
552 butler.pruneDatasets([ref], tags=["tagged"])
553 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
556class ButlerTests(ButlerPutGetTests):
557 """Tests for Butler."""
559 useTempRoot = True
560 validationCanFail: bool
561 fullConfigKey: str | None
562 registryStr: str | None
563 datastoreName: list[str] | None
564 datastoreStr: list[str]
566 def setUp(self) -> None:
567 """Create a new butler root for each test."""
568 self.root = makeTestTempDir(TESTDIR)
569 Butler.makeRepo(self.root, config=Config(self.configFile))
570 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
572 def testConstructor(self) -> None:
573 """Independent test of constructor."""
574 butler = Butler(self.tmpConfigFile, run=self.default_run)
575 self.assertIsInstance(butler, Butler)
577 # Check that butler.yaml is added automatically.
578 if self.tmpConfigFile.endswith(end := "/butler.yaml"):
579 config_dir = self.tmpConfigFile[: -len(end)]
580 butler = Butler(config_dir, run=self.default_run)
581 self.assertIsInstance(butler, Butler)
583 # Even with a ResourcePath.
584 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
585 self.assertIsInstance(butler, Butler)
587 collections = set(butler.registry.queryCollections())
588 self.assertEqual(collections, {self.default_run})
590 # Check that some special characters can be included in run name.
591 special_run = "u@b.c-A"
592 butler_special = Butler(butler=butler, run=special_run)
593 collections = set(butler_special.registry.queryCollections("*@*"))
594 self.assertEqual(collections, {special_run})
596 butler2 = Butler(butler=butler, collections=["other"])
597 self.assertEqual(butler2.collections, ("other",))
598 self.assertIsNone(butler2.run)
599 self.assertIs(butler._datastore, butler2._datastore)
601 # Test that we can use an environment variable to find this
602 # repository.
603 butler_index = Config()
604 butler_index["label"] = self.tmpConfigFile
605 for suffix in (".yaml", ".json"):
606 # Ensure that the content differs so that we know that
607 # we aren't reusing the cache.
608 bad_label = f"file://bucket/not_real{suffix}"
609 butler_index["bad_label"] = bad_label
610 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
611 butler_index.dumpToUri(temp_file)
612 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
613 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"})
614 uri = Butler.get_repo_uri("bad_label")
615 self.assertEqual(uri, ResourcePath(bad_label))
616 uri = Butler.get_repo_uri("label")
617 butler = Butler(uri, writeable=False)
618 self.assertIsInstance(butler, Butler)
619 butler = Butler("label", writeable=False)
620 self.assertIsInstance(butler, Butler)
621 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
622 Butler("not_there", writeable=False)
623 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"):
624 Butler("bad_label")
625 with self.assertRaises(FileNotFoundError):
626 # Should ignore aliases.
627 Butler(ResourcePath("label", forceAbsolute=False))
628 with self.assertRaises(KeyError) as cm:
629 Butler.get_repo_uri("missing")
630 self.assertEqual(
631 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False)
632 )
633 self.assertIn("not known to", str(cm.exception))
634 # Should report no failure.
635 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "")
636 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
637 # Now with empty configuration.
638 butler_index = Config()
639 butler_index.dumpToUri(temp_file)
640 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
641 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"):
642 Butler("label")
643 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
644 # Now with bad contents.
645 with open(temp_file.ospath, "w") as fh:
646 print("'", file=fh)
647 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
648 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"):
649 Butler("label")
650 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
651 with self.assertRaises(FileNotFoundError):
652 Butler.get_repo_uri("label")
653 self.assertEqual(Butler.get_known_repos(), set())
655 with self.assertRaisesRegex(FileNotFoundError, "index file not found"):
656 Butler("label")
658 # Check that we can create Butler when the alias file is not found.
659 butler = Butler(self.tmpConfigFile, writeable=False)
660 self.assertIsInstance(butler, Butler)
661 with self.assertRaises(KeyError) as cm:
662 # No environment variable set.
663 Butler.get_repo_uri("label")
664 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False))
665 self.assertIn("No repository index defined", str(cm.exception))
666 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"):
667 # No aliases registered.
668 Butler("not_there")
669 self.assertEqual(Butler.get_known_repos(), set())
671 def testBasicPutGet(self) -> None:
672 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
673 self.runPutGetTest(storageClass, "test_metric")
675 def testCompositePutGetConcrete(self) -> None:
676 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
677 butler = self.runPutGetTest(storageClass, "test_metric")
679 # Should *not* be disassembled
680 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
681 self.assertEqual(len(datasets), 1)
682 uri, components = butler.getURIs(datasets[0])
683 self.assertIsInstance(uri, ResourcePath)
684 self.assertFalse(components)
685 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
686 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
688 # Predicted dataset
689 dataId = {"instrument": "DummyCamComp", "visit": 424}
690 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
691 self.assertFalse(components)
692 self.assertIsInstance(uri, ResourcePath)
693 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
694 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
696 def testCompositePutGetVirtual(self) -> None:
697 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
698 butler = self.runPutGetTest(storageClass, "test_metric_comp")
700 # Should be disassembled
701 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
702 self.assertEqual(len(datasets), 1)
703 uri, components = butler.getURIs(datasets[0])
705 if butler._datastore.isEphemeral:
706 # Never disassemble in-memory datastore
707 self.assertIsInstance(uri, ResourcePath)
708 self.assertFalse(components)
709 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
710 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
711 else:
712 self.assertIsNone(uri)
713 self.assertEqual(set(components), set(storageClass.components))
714 for compuri in components.values():
715 self.assertIsInstance(compuri, ResourcePath)
716 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
717 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
719 # Predicted dataset
720 dataId = {"instrument": "DummyCamComp", "visit": 424}
721 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
723 if butler._datastore.isEphemeral:
724 # Never disassembled
725 self.assertIsInstance(uri, ResourcePath)
726 self.assertFalse(components)
727 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
728 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
729 else:
730 self.assertIsNone(uri)
731 self.assertEqual(set(components), set(storageClass.components))
732 for compuri in components.values():
733 self.assertIsInstance(compuri, ResourcePath)
734 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
735 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
737 def testStorageClassOverrideGet(self) -> None:
738 """Test storage class conversion on get with override."""
739 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
740 datasetTypeName = "anything"
741 run = self.default_run
743 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
745 # Create and store a dataset.
746 metric = makeExampleMetrics()
747 dataId = {"instrument": "DummyCamComp", "visit": 423}
749 ref = butler.put(metric, datasetType, dataId)
751 # Return native type.
752 retrieved = butler.get(ref)
753 self.assertEqual(retrieved, metric)
755 # Specify an override.
756 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
757 model = butler.get(ref, storageClass=new_sc)
758 self.assertNotEqual(type(model), type(retrieved))
759 self.assertIs(type(model), new_sc.pytype)
760 self.assertEqual(retrieved, model)
762 # Defer but override later.
763 deferred = butler.getDeferred(ref)
764 model = deferred.get(storageClass=new_sc)
765 self.assertIs(type(model), new_sc.pytype)
766 self.assertEqual(retrieved, model)
768 # Defer but override up front.
769 deferred = butler.getDeferred(ref, storageClass=new_sc)
770 model = deferred.get()
771 self.assertIs(type(model), new_sc.pytype)
772 self.assertEqual(retrieved, model)
774 # Retrieve a component. Should be a tuple.
775 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple")
776 self.assertIs(type(data), tuple)
777 self.assertEqual(data, tuple(retrieved.data))
779 # Parameter on the write storage class should work regardless
780 # of read storage class.
781 data = butler.get(
782 "anything.data",
783 dataId,
784 storageClass="StructuredDataDataTestTuple",
785 parameters={"slice": slice(2, 4)},
786 )
787 self.assertEqual(len(data), 2)
789 # Try a parameter that is known to the read storage class but not
790 # the write storage class.
791 with self.assertRaises(KeyError):
792 butler.get(
793 "anything.data",
794 dataId,
795 storageClass="StructuredDataDataTestTuple",
796 parameters={"xslice": slice(2, 4)},
797 )
799 def testPytypePutCoercion(self) -> None:
800 """Test python type coercion on Butler.get and put."""
801 # Store some data with the normal example storage class.
802 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
803 datasetTypeName = "test_metric"
804 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)
806 dataId = {"instrument": "DummyCamComp", "visit": 423}
808 # Put a dict and this should coerce to a MetricsExample
809 test_dict = {"summary": {"a": 1}, "output": {"b": 2}}
810 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424)
811 test_metric = butler.get(metric_ref)
812 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample")
813 self.assertEqual(test_metric.summary, test_dict["summary"])
814 self.assertEqual(test_metric.output, test_dict["output"])
816 # Check that the put still works if a DatasetType is given with
817 # a definition matching this python type.
818 registry_type = butler.registry.getDatasetType(datasetTypeName)
819 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson")
820 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425)
821 self.assertEqual(metric2_ref.datasetType, registry_type)
823 # The get will return the type expected by registry.
824 test_metric2 = butler.get(metric2_ref)
825 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample")
827 # Make a new DatasetRef with the compatible but different DatasetType.
828 # This should now return a dict.
829 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run)
830 test_dict2 = butler.get(new_ref)
831 self.assertEqual(get_full_type_name(test_dict2), "dict")
833 # Get it again with the wrong dataset type definition using get()
834 # rather than get(). This should be consistent with get()
835 # behavior and return the type of the DatasetType.
836 test_dict3 = butler.get(this_type, dataId=dataId, visit=425)
837 self.assertEqual(get_full_type_name(test_dict3), "dict")
839 def testIngest(self) -> None:
840 butler = Butler(self.tmpConfigFile, run=self.default_run)
842 # Create and register a DatasetType
843 dimensions = butler.dimensions.extract(["instrument", "visit", "detector"])
845 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
846 datasetTypeName = "metric"
848 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
850 # Add needed Dimensions
851 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
852 butler.registry.insertDimensionData(
853 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
854 )
855 for detector in (1, 2):
856 butler.registry.insertDimensionData(
857 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"}
858 )
860 butler.registry.insertDimensionData(
861 "visit",
862 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
863 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"},
864 )
866 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter")
867 dataRoot = os.path.join(TESTDIR, "data", "basic")
868 datasets = []
869 for detector in (1, 2):
870 detector_name = f"detector_{detector}"
871 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
872 dataId = butler.registry.expandDataId(
873 {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
874 )
875 # Create a DatasetRef for ingest
876 refIn = DatasetRef(datasetType, dataId, run=self.default_run)
878 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter))
880 butler.ingest(*datasets, transfer="copy")
882 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
883 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
885 metrics1 = butler.get(datasetTypeName, dataId1)
886 metrics2 = butler.get(datasetTypeName, dataId2)
887 self.assertNotEqual(metrics1, metrics2)
889 # Compare URIs
890 uri1 = butler.getURI(datasetTypeName, dataId1)
891 uri2 = butler.getURI(datasetTypeName, dataId2)
892 self.assertNotEqual(uri1, uri2)
894 # Now do a multi-dataset but single file ingest
895 metricFile = os.path.join(dataRoot, "detectors.yaml")
896 refs = []
897 for detector in (1, 2):
898 detector_name = f"detector_{detector}"
899 dataId = butler.registry.expandDataId(
900 {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
901 )
902 # Create a DatasetRef for ingest
903 refs.append(DatasetRef(datasetType, dataId, run=self.default_run))
905 # Test "move" transfer to ensure that the files themselves
906 # have disappeared following ingest.
907 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile:
908 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy")
910 datasets = []
911 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter))
913 # For first ingest use copy.
914 butler.ingest(*datasets, transfer="copy", record_validation_info=False)
916 # Now try to ingest again in "execution butler" mode where
917 # the registry entries exist but the datastore does not have
918 # the files. We also need to strip the dimension records to ensure
919 # that they will be re-added by the ingest.
920 ref = datasets[0].refs[0]
921 datasets[0].refs = [
922 cast(
923 DatasetRef,
924 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run),
925 )
926 for ref in datasets[0].refs
927 ]
928 all_refs = []
929 for dataset in datasets:
930 refs = []
931 for ref in dataset.refs:
932 # Create a dict from the dataId to drop the records.
933 new_data_id = {str(k): v for k, v in ref.dataId.items()}
934 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run)
935 assert new_ref is not None
936 self.assertFalse(new_ref.dataId.hasRecords())
937 refs.append(new_ref)
938 dataset.refs = refs
939 all_refs.extend(dataset.refs)
940 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False)
942 # Use move mode to test that the file is deleted. Also
943 # disable recording of file size.
944 butler.ingest(*datasets, transfer="move", record_validation_info=False)
946 # Check that every ref now has records.
947 for dataset in datasets:
948 for ref in dataset.refs:
949 self.assertTrue(ref.dataId.hasRecords())
951 # Ensure that the file has disappeared.
952 self.assertFalse(tempFile.exists())
954 # Check that the datastore recorded no file size.
955 # Not all datastores can support this.
956 try:
957 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined]
958 self.assertEqual(infos[0].file_size, -1)
959 except AttributeError:
960 pass
962 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
963 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
965 multi1 = butler.get(datasetTypeName, dataId1)
966 multi2 = butler.get(datasetTypeName, dataId2)
968 self.assertEqual(multi1, metrics1)
969 self.assertEqual(multi2, metrics2)
971 # Compare URIs
972 uri1 = butler.getURI(datasetTypeName, dataId1)
973 uri2 = butler.getURI(datasetTypeName, dataId2)
974 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
976 # Test that removing one does not break the second
977 # This line will issue a warning log message for a ChainedDatastore
978 # that uses an InMemoryDatastore since in-memory can not ingest
979 # files.
980 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
981 self.assertFalse(butler.exists(datasetTypeName, dataId1))
982 self.assertTrue(butler.exists(datasetTypeName, dataId2))
983 multi2b = butler.get(datasetTypeName, dataId2)
984 self.assertEqual(multi2, multi2b)
986 # Ensure we can ingest 0 datasets
987 datasets = []
988 butler.ingest(*datasets)
990 def testPickle(self) -> None:
991 """Test pickle support."""
992 butler = Butler(self.tmpConfigFile, run=self.default_run)
993 butlerOut = pickle.loads(pickle.dumps(butler))
994 self.assertIsInstance(butlerOut, Butler)
995 self.assertEqual(butlerOut._config, butler._config)
996 self.assertEqual(butlerOut.collections, butler.collections)
997 self.assertEqual(butlerOut.run, butler.run)
999 def testGetDatasetTypes(self) -> None:
1000 butler = Butler(self.tmpConfigFile, run=self.default_run)
1001 dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"])
1002 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [
1003 (
1004 "instrument",
1005 [
1006 {"instrument": "DummyCam"},
1007 {"instrument": "DummyHSC"},
1008 {"instrument": "DummyCamComp"},
1009 ],
1010 ),
1011 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]),
1012 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]),
1013 ]
1014 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1015 # Add needed Dimensions
1016 for element, data in dimensionEntries:
1017 butler.registry.insertDimensionData(element, *data)
1019 # When a DatasetType is added to the registry entries are not created
1020 # for components but querying them can return the components.
1021 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
1022 components = set()
1023 for datasetTypeName in datasetTypeNames:
1024 # Create and register a DatasetType
1025 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1027 for componentName in storageClass.components:
1028 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
1030 fromRegistry: set[DatasetType] = set()
1031 for parent_dataset_type in butler.registry.queryDatasetTypes():
1032 fromRegistry.add(parent_dataset_type)
1033 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes())
1034 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
1036 # Now that we have some dataset types registered, validate them
1037 butler.validateConfiguration(
1038 ignore=[
1039 "test_metric_comp",
1040 "metric3",
1041 "metric5",
1042 "calexp",
1043 "DummySC",
1044 "datasetType.component",
1045 "random_data",
1046 "random_data_2",
1047 ]
1048 )
1050 # Add a new datasetType that will fail template validation
1051 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
1052 if self.validationCanFail:
1053 with self.assertRaises(ValidationError):
1054 butler.validateConfiguration()
1056 # Rerun validation but with a subset of dataset type names
1057 butler.validateConfiguration(datasetTypeNames=["metric4"])
1059 # Rerun validation but ignore the bad datasetType
1060 butler.validateConfiguration(
1061 ignore=[
1062 "test_metric_comp",
1063 "metric3",
1064 "metric5",
1065 "calexp",
1066 "DummySC",
1067 "datasetType.component",
1068 "random_data",
1069 "random_data_2",
1070 ]
1071 )
1073 def testTransaction(self) -> None:
1074 butler = Butler(self.tmpConfigFile, run=self.default_run)
1075 datasetTypeName = "test_metric"
1076 dimensions = butler.dimensions.extract(["instrument", "visit"])
1077 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = (
1078 ("instrument", {"instrument": "DummyCam"}),
1079 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1080 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1081 )
1082 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1083 metric = makeExampleMetrics()
1084 dataId = {"instrument": "DummyCam", "visit": 42}
1085 # Create and register a DatasetType
1086 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1087 with self.assertRaises(TransactionTestError):
1088 with butler.transaction():
1089 # Add needed Dimensions
1090 for args in dimensionEntries:
1091 butler.registry.insertDimensionData(*args)
1092 # Store a dataset
1093 ref = butler.put(metric, datasetTypeName, dataId)
1094 self.assertIsInstance(ref, DatasetRef)
1095 # Test getDirect
1096 metricOut = butler.get(ref)
1097 self.assertEqual(metric, metricOut)
1098 # Test get
1099 metricOut = butler.get(datasetTypeName, dataId)
1100 self.assertEqual(metric, metricOut)
1101 # Check we can get components
1102 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric)
1103 raise TransactionTestError("This should roll back the entire transaction")
1104 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"):
1105 butler.registry.expandDataId(dataId)
1106 # Should raise LookupError for missing data ID value
1107 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
1108 butler.get(datasetTypeName, dataId)
1109 # Also check explicitly if Dataset entry is missing
1110 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
1111 # Direct retrieval should not find the file in the Datastore
1112 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
1113 butler.get(ref)
1115 def testMakeRepo(self) -> None:
1116 """Test that we can write butler configuration to a new repository via
1117 the Butler.makeRepo interface and then instantiate a butler from the
1118 repo root.
1119 """
1120 # Do not run the test if we know this datastore configuration does
1121 # not support a file system root
1122 if self.fullConfigKey is None:
1123 return
1125 # create two separate directories
1126 root1 = tempfile.mkdtemp(dir=self.root)
1127 root2 = tempfile.mkdtemp(dir=self.root)
1129 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
1130 limited = Config(self.configFile)
1131 butler1 = Butler(butlerConfig)
1132 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
1133 full = Config(self.tmpConfigFile)
1134 butler2 = Butler(butlerConfig)
1135 # Butlers should have the same configuration regardless of whether
1136 # defaults were expanded.
1137 self.assertEqual(butler1._config, butler2._config)
1138 # Config files loaded directly should not be the same.
1139 self.assertNotEqual(limited, full)
1140 # Make sure "limited" doesn't have a few keys we know it should be
1141 # inheriting from defaults.
1142 self.assertIn(self.fullConfigKey, full)
1143 self.assertNotIn(self.fullConfigKey, limited)
1145 # Collections don't appear until something is put in them
1146 collections1 = set(butler1.registry.queryCollections())
1147 self.assertEqual(collections1, set())
1148 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
1150 # Check that a config with no associated file name will not
1151 # work properly with relocatable Butler repo
1152 butlerConfig.configFile = None
1153 with self.assertRaises(ValueError):
1154 Butler(butlerConfig)
1156 with self.assertRaises(FileExistsError):
1157 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)
1159 def testStringification(self) -> None:
1160 butler = Butler(self.tmpConfigFile, run=self.default_run)
1161 butlerStr = str(butler)
1163 if self.datastoreStr is not None:
1164 for testStr in self.datastoreStr:
1165 self.assertIn(testStr, butlerStr)
1166 if self.registryStr is not None:
1167 self.assertIn(self.registryStr, butlerStr)
1169 datastoreName = butler._datastore.name
1170 if self.datastoreName is not None:
1171 for testStr in self.datastoreName:
1172 self.assertIn(testStr, datastoreName)
1174 def testButlerRewriteDataId(self) -> None:
1175 """Test that dataIds can be rewritten based on dimension records."""
1176 butler = Butler(self.tmpConfigFile, run=self.default_run)
1178 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1179 datasetTypeName = "random_data"
1181 # Create dimension records.
1182 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1183 butler.registry.insertDimensionData(
1184 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1185 )
1186 butler.registry.insertDimensionData(
1187 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1188 )
1190 dimensions = butler.dimensions.extract(["instrument", "exposure"])
1191 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1192 butler.registry.registerDatasetType(datasetType)
1194 n_exposures = 5
1195 dayobs = 20210530
1197 for i in range(n_exposures):
1198 butler.registry.insertDimensionData(
1199 "exposure",
1200 {
1201 "instrument": "DummyCamComp",
1202 "id": i,
1203 "obs_id": f"exp{i}",
1204 "seq_num": i,
1205 "day_obs": dayobs,
1206 "physical_filter": "d-r",
1207 },
1208 )
1210 # Write some data.
1211 for i in range(n_exposures):
1212 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]}
1214 # Use the seq_num for the put to test rewriting.
1215 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1216 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1218 # Check that the exposure is correct in the dataId
1219 self.assertEqual(ref.dataId["exposure"], i)
1221 # and check that we can get the dataset back with the same dataId
1222 new_metric = butler.get(datasetTypeName, dataId=dataId)
1223 self.assertEqual(new_metric, metric)
1226class FileDatastoreButlerTests(ButlerTests):
1227 """Common tests and specialization of ButlerTests for butlers backed
1228 by datastores that inherit from FileDatastore.
1229 """
1231 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool:
1232 """Check if file exists at a given path (relative to root).
1234 Test testPutTemplates verifies actual physical existance of the files
1235 in the requested location.
1236 """
1237 uri = ResourcePath(root, forceDirectory=True)
1238 return uri.join(relpath).exists()
1240 def testPutTemplates(self) -> None:
1241 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1242 butler = Butler(self.tmpConfigFile, run=self.default_run)
1244 # Add needed Dimensions
1245 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1246 butler.registry.insertDimensionData(
1247 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1248 )
1249 butler.registry.insertDimensionData(
1250 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"}
1251 )
1252 butler.registry.insertDimensionData(
1253 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"}
1254 )
1256 # Create and store a dataset
1257 metric = makeExampleMetrics()
1259 # Create two almost-identical DatasetTypes (both will use default
1260 # template)
1261 dimensions = butler.dimensions.extract(["instrument", "visit"])
1262 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1263 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1264 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1266 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1267 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1269 # Put with exactly the data ID keys needed
1270 ref = butler.put(metric, "metric1", dataId1)
1271 uri = butler.getURI(ref)
1272 self.assertTrue(uri.exists())
1273 self.assertTrue(
1274 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle")
1275 )
1277 # Check the template based on dimensions
1278 if hasattr(butler._datastore, "templates"):
1279 butler._datastore.templates.validateTemplates([ref])
1281 # Put with extra data ID keys (physical_filter is an optional
1282 # dependency); should not change template (at least the way we're
1283 # defining them to behave now; the important thing is that they
1284 # must be consistent).
1285 ref = butler.put(metric, "metric2", dataId2)
1286 uri = butler.getURI(ref)
1287 self.assertTrue(uri.exists())
1288 self.assertTrue(
1289 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle")
1290 )
1292 # Check the template based on dimensions
1293 if hasattr(butler._datastore, "templates"):
1294 butler._datastore.templates.validateTemplates([ref])
1296 # Use a template that has a typo in dimension record metadata.
1297 # Easier to test with a butler that has a ref with records attached.
1298 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
1299 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1300 path = template.format(ref)
1301 self.assertEqual(path, f"a/v423/{ref.id}_fits")
1303 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
1304 with self.assertRaises(KeyError):
1305 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1306 template.format(ref)
1308 # Now use a file template that will not result in unique filenames
1309 with self.assertRaises(FileTemplateValidationError):
1310 butler.put(metric, "metric3", dataId1)
1312 def testImportExport(self) -> None:
1313 # Run put/get tests just to create and populate a repo.
1314 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1315 self.runImportExportTest(storageClass)
1317 @unittest.expectedFailure
1318 def testImportExportVirtualComposite(self) -> None:
1319 # Run put/get tests just to create and populate a repo.
1320 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1321 self.runImportExportTest(storageClass)
1323 def runImportExportTest(self, storageClass: StorageClass) -> None:
1324 """Test exporting and importing.
1326 This test does an export to a temp directory and an import back
1327 into a new temp directory repo. It does not assume a posix datastore.
1328 """
1329 exportButler = self.runPutGetTest(storageClass, "test_metric")
1331 # Test that we must have a file extension.
1332 with self.assertRaises(ValueError):
1333 with exportButler.export(filename="dump", directory=".") as export:
1334 pass
1336 # Test that unknown format is not allowed.
1337 with self.assertRaises(ValueError):
1338 with exportButler.export(filename="dump.fits", directory=".") as export:
1339 pass
1341 # Test that the repo actually has at least one dataset.
1342 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1343 self.assertGreater(len(datasets), 0)
1344 # Add a DimensionRecord that's unused by those datasets.
1345 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1346 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1347 # Export and then import datasets.
1348 with safeTestTempDir(TESTDIR) as exportDir:
1349 exportFile = os.path.join(exportDir, "exports.yaml")
1350 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1351 export.saveDatasets(datasets)
1352 # Export the same datasets again. This should quietly do
1353 # nothing because of internal deduplication, and it shouldn't
1354 # complain about being asked to export the "htm7" elements even
1355 # though there aren't any in these datasets or in the database.
1356 export.saveDatasets(datasets, elements=["htm7"])
1357 # Save one of the data IDs again; this should be harmless
1358 # because of internal deduplication.
1359 export.saveDataIds([datasets[0].dataId])
1360 # Save some dimension records directly.
1361 export.saveDimensionData("skymap", [skymapRecord])
1362 self.assertTrue(os.path.exists(exportFile))
1363 with safeTestTempDir(TESTDIR) as importDir:
1364 # We always want this to be a local posix butler
1365 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1366 # Calling script.butlerImport tests the implementation of the
1367 # butler command line interface "import" subcommand. Functions
1368 # in the script folder are generally considered protected and
1369 # should not be used as public api.
1370 with open(exportFile) as f:
1371 script.butlerImport(
1372 importDir,
1373 export_file=f,
1374 directory=exportDir,
1375 transfer="auto",
1376 skip_dimensions=None,
1377 )
1378 importButler = Butler(importDir, run=self.default_run)
1379 for ref in datasets:
1380 with self.subTest(ref=ref):
1381 # Test for existence by passing in the DatasetType and
1382 # data ID separately, to avoid lookup by dataset_id.
1383 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId))
1384 self.assertEqual(
1385 list(importButler.registry.queryDimensionRecords("skymap")),
1386 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)],
1387 )
1389 def testRemoveRuns(self) -> None:
1390 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1391 butler = Butler(self.tmpConfigFile, writeable=True)
1392 # Load registry data with dimensions to hang datasets off of.
1393 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1394 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1395 # Add some RUN-type collection.
1396 run1 = "run1"
1397 butler.registry.registerRun(run1)
1398 run2 = "run2"
1399 butler.registry.registerRun(run2)
1400 # put a dataset in each
1401 metric = makeExampleMetrics()
1402 dimensions = butler.dimensions.extract(["instrument", "physical_filter"])
1403 datasetType = self.addDatasetType(
1404 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1405 )
1406 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1407 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1408 uri1 = butler.getURI(ref1)
1409 uri2 = butler.getURI(ref2)
1411 with self.assertRaises(OrphanedRecordError):
1412 butler.registry.removeDatasetType(datasetType.name)
1414 # Remove from both runs with different values for unstore.
1415 butler.removeRuns([run1], unstore=True)
1416 butler.removeRuns([run2], unstore=False)
1417 # Should be nothing in registry for either one, and datastore should
1418 # not think either exists.
1419 with self.assertRaises(MissingCollectionError):
1420 butler.registry.getCollectionType(run1)
1421 with self.assertRaises(MissingCollectionError):
1422 butler.registry.getCollectionType(run2)
1423 self.assertFalse(butler.stored(ref1))
1424 self.assertFalse(butler.stored(ref2))
1425 # The ref we unstored should be gone according to the URI, but the
1426 # one we forgot should still be around.
1427 self.assertFalse(uri1.exists())
1428 self.assertTrue(uri2.exists())
1430 # Now that the collections have been pruned we can remove the
1431 # dataset type
1432 butler.registry.removeDatasetType(datasetType.name)
1434 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm:
1435 butler.registry.removeDatasetType(tuple(["test*", "test*"]))
1436 self.assertIn("not defined", "\n".join(cm.output))
1439class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1440 """PosixDatastore specialization of a butler"""
1442 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1443 fullConfigKey: str | None = ".datastore.formatters"
1444 validationCanFail = True
1445 datastoreStr = ["/tmp"]
1446 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1447 registryStr = "/gen3.sqlite3"
1449 def testPathConstructor(self) -> None:
1450 """Independent test of constructor using PathLike."""
1451 butler = Butler(self.tmpConfigFile, run=self.default_run)
1452 self.assertIsInstance(butler, Butler)
1454 # And again with a Path object with the butler yaml
1455 path = pathlib.Path(self.tmpConfigFile)
1456 butler = Butler(path, writeable=False)
1457 self.assertIsInstance(butler, Butler)
1459 # And again with a Path object without the butler yaml
1460 # (making sure we skip it if the tmp config doesn't end
1461 # in butler.yaml -- which is the case for a subclass)
1462 if self.tmpConfigFile.endswith("butler.yaml"):
1463 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1464 butler = Butler(path, writeable=False)
1465 self.assertIsInstance(butler, Butler)
1467 def testExportTransferCopy(self) -> None:
1468 """Test local export using all transfer modes"""
1469 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1470 exportButler = self.runPutGetTest(storageClass, "test_metric")
1471 # Test that the repo actually has at least one dataset.
1472 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1473 self.assertGreater(len(datasets), 0)
1474 uris = [exportButler.getURI(d) for d in datasets]
1475 assert isinstance(exportButler._datastore, FileDatastore)
1476 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]]
1478 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1480 for path in pathsInStore:
1481 # Assume local file system
1482 assert path is not None
1483 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}")
1485 for transfer in ("copy", "link", "symlink", "relsymlink"):
1486 with safeTestTempDir(TESTDIR) as exportDir:
1487 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export:
1488 export.saveDatasets(datasets)
1489 for path in pathsInStore:
1490 assert path is not None
1491 self.assertTrue(
1492 self.checkFileExists(exportDir, path),
1493 f"Check that mode {transfer} exported files",
1494 )
1496 def testPruneDatasets(self) -> None:
1497 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1498 butler = Butler(self.tmpConfigFile, writeable=True)
1499 assert isinstance(butler._datastore, FileDatastore)
1500 # Load registry data with dimensions to hang datasets off of.
1501 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1502 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1503 # Add some RUN-type collections.
1504 run1 = "run1"
1505 butler.registry.registerRun(run1)
1506 run2 = "run2"
1507 butler.registry.registerRun(run2)
1508 # put some datasets. ref1 and ref2 have the same data ID, and are in
1509 # different runs. ref3 has a different data ID.
1510 metric = makeExampleMetrics()
1511 dimensions = butler.dimensions.extract(["instrument", "physical_filter"])
1512 datasetType = self.addDatasetType(
1513 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1514 )
1515 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1516 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1517 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1519 many_stored = butler.stored_many([ref1, ref2, ref3])
1520 for ref, stored in many_stored.items():
1521 self.assertTrue(stored, f"Ref {ref} should be stored")
1523 many_exists = butler._exists_many([ref1, ref2, ref3])
1524 for ref, exists in many_exists.items():
1525 self.assertTrue(exists, f"Checking ref {ref} exists.")
1526 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored")
1528 # Simple prune.
1529 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1530 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1))
1532 many_stored = butler.stored_many([ref1, ref2, ref3])
1533 for ref, stored in many_stored.items():
1534 self.assertFalse(stored, f"Ref {ref} should not be stored")
1536 many_exists = butler._exists_many([ref1, ref2, ref3])
1537 for ref, exists in many_exists.items():
1538 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored")
1540 # Put data back.
1541 ref1_new = butler.put(metric, ref1)
1542 self.assertEqual(ref1_new, ref1) # Reuses original ID.
1543 ref2 = butler.put(metric, ref2)
1545 many_stored = butler.stored_many([ref1, ref2, ref3])
1546 self.assertTrue(many_stored[ref1])
1547 self.assertTrue(many_stored[ref2])
1548 self.assertFalse(many_stored[ref3])
1550 ref3 = butler.put(metric, ref3)
1552 many_exists = butler._exists_many([ref1, ref2, ref3])
1553 for ref, exists in many_exists.items():
1554 self.assertTrue(exists, f"Ref {ref} should not be stored")
1556 # Clear out the datasets from registry and start again.
1557 refs = [ref1, ref2, ref3]
1558 butler.pruneDatasets(refs, purge=True, unstore=True)
1559 for ref in refs:
1560 butler.put(metric, ref)
1562 # Test different forms of file availability.
1563 # Need to be in a state where:
1564 # - one ref just has registry record.
1565 # - one ref has a missing file but a datastore record.
1566 # - one ref has a missing datastore record but file is there.
1567 # - one ref does not exist anywhere.
1568 # Do not need to test a ref that has everything since that is tested
1569 # above.
1570 ref0 = DatasetRef(
1571 datasetType,
1572 DataCoordinate.standardize(
1573 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions
1574 ),
1575 run=run1,
1576 )
1578 # Delete from datastore and retain in Registry.
1579 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False)
1581 # File has been removed.
1582 uri2 = butler.getURI(ref2)
1583 uri2.remove()
1585 # Datastore has lost track.
1586 butler._datastore.forget([ref3])
1588 # First test with a standard butler.
1589 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1590 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1591 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1592 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1593 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED)
1595 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False)
1596 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1597 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1598 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN)
1599 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1600 self.assertTrue(exists_many[ref2])
1602 # Check that per-ref query gives the same answer as many query.
1603 for ref, exists in exists_many.items():
1604 self.assertEqual(butler.exists(ref, full_check=False), exists)
1606 # Test again with a trusting butler.
1607 butler._datastore.trustGetRequest = True
1608 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1609 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1610 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1611 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1612 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT)
1614 # Check that per-ref query gives the same answer as many query.
1615 for ref, exists in exists_many.items():
1616 self.assertEqual(butler.exists(ref, full_check=True), exists)
1618 # Create a ref that surprisingly has the UUID of an existing ref
1619 # but is not the same.
1620 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id)
1621 with self.assertRaises(ValueError):
1622 butler.exists(ref_bad)
1624 # Create a ref that has a compatible storage class.
1625 ref_compat = ref2.overrideStorageClass("StructuredDataDict")
1626 exists = butler.exists(ref_compat)
1627 self.assertEqual(exists, exists_many[ref2])
1629 # Remove everything and start from scratch.
1630 butler._datastore.trustGetRequest = False
1631 butler.pruneDatasets(refs, purge=True, unstore=True)
1632 for ref in refs:
1633 butler.put(metric, ref)
1635 # These tests mess directly with the trash table and can leave the
1636 # datastore in an odd state. Do them at the end.
1637 # Check that in normal mode, deleting the record will lead to
1638 # trash not touching the file.
1639 uri1 = butler.getURI(ref1)
1640 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table
1641 butler._datastore.forget([ref1])
1642 butler._datastore.trash(ref1)
1643 butler._datastore.emptyTrash()
1644 self.assertTrue(uri1.exists())
1645 uri1.remove() # Clean it up.
1647 # Simulate execution butler setup by deleting the datastore
1648 # record but keeping the file around and trusting.
1649 butler._datastore.trustGetRequest = True
1650 uris = butler.get_many_uris([ref2, ref3])
1651 uri2 = uris[ref2].primaryURI
1652 uri3 = uris[ref3].primaryURI
1653 self.assertTrue(uri2.exists())
1654 self.assertTrue(uri3.exists())
1656 # Remove the datastore record.
1657 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table
1658 butler._datastore.forget([ref2])
1659 self.assertTrue(uri2.exists())
1660 butler._datastore.trash([ref2, ref3])
1661 # Immediate removal for ref2 file
1662 self.assertFalse(uri2.exists())
1663 # But ref3 has to wait for the empty.
1664 self.assertTrue(uri3.exists())
1665 butler._datastore.emptyTrash()
1666 self.assertFalse(uri3.exists())
1668 # Clear out the datasets from registry.
1669 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1671 def testPytypeCoercion(self) -> None:
1672 """Test python type coercion on Butler.get and put."""
1673 # Store some data with the normal example storage class.
1674 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1675 datasetTypeName = "test_metric"
1676 butler = self.runPutGetTest(storageClass, datasetTypeName)
1678 dataId = {"instrument": "DummyCamComp", "visit": 423}
1679 metric = butler.get(datasetTypeName, dataId=dataId)
1680 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample")
1682 datasetType_ori = butler.registry.getDatasetType(datasetTypeName)
1683 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents")
1685 # Now need to hack the registry dataset type definition.
1686 # There is no API for this.
1687 assert isinstance(butler._registry, SqlRegistry)
1688 manager = butler._registry._managers.datasets
1689 assert hasattr(manager, "_db") and hasattr(manager, "_static")
1690 manager._db.update(
1691 manager._static.dataset_type,
1692 {"name": datasetTypeName},
1693 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"},
1694 )
1696 # Force reset of dataset type cache
1697 butler.registry.refresh()
1699 datasetType_new = butler.registry.getDatasetType(datasetTypeName)
1700 self.assertEqual(datasetType_new.name, datasetType_ori.name)
1701 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel")
1703 metric_model = butler.get(datasetTypeName, dataId=dataId)
1704 self.assertNotEqual(type(metric_model), type(metric))
1705 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel")
1707 # Put the model and read it back to show that everything now
1708 # works as normal.
1709 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424)
1710 metric_model_new = butler.get(metric_ref)
1711 self.assertEqual(metric_model_new, metric_model)
1713 # Hack the storage class again to something that will fail on the
1714 # get with no conversion class.
1715 manager._db.update(
1716 manager._static.dataset_type,
1717 {"name": datasetTypeName},
1718 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"},
1719 )
1720 butler.registry.refresh()
1722 with self.assertRaises(ValueError):
1723 butler.get(datasetTypeName, dataId=dataId)
1726@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1727class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1728 """PosixDatastore specialization of a butler using Postgres"""
1730 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1731 fullConfigKey = ".datastore.formatters"
1732 validationCanFail = True
1733 datastoreStr = ["/tmp"]
1734 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1735 registryStr = "PostgreSQL@test"
1736 postgresql: Any
1738 @staticmethod
1739 def _handler(postgresql: Any) -> None:
1740 engine = sqlalchemy.engine.create_engine(postgresql.url())
1741 with engine.begin() as connection:
1742 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
1744 @classmethod
1745 def setUpClass(cls) -> None:
1746 # Create the postgres test server.
1747 cls.postgresql = testing.postgresql.PostgresqlFactory(
1748 cache_initialized_db=True, on_initialized=cls._handler
1749 )
1750 super().setUpClass()
1752 @classmethod
1753 def tearDownClass(cls) -> None:
1754 # Clean up any lingering SQLAlchemy engines/connections
1755 # so they're closed before we shut down the server.
1756 gc.collect()
1757 cls.postgresql.clear_cache()
1758 super().tearDownClass()
1760 def setUp(self) -> None:
1761 self.server = self.postgresql()
1763 # Need to add a registry section to the config.
1764 self._temp_config = False
1765 config = Config(self.configFile)
1766 config["registry", "db"] = self.server.url()
1767 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh:
1768 config.dump(fh)
1769 self.configFile = fh.name
1770 self._temp_config = True
1771 super().setUp()
1773 def tearDown(self) -> None:
1774 self.server.stop()
1775 if self._temp_config and os.path.exists(self.configFile):
1776 os.remove(self.configFile)
1777 super().tearDown()
1779 def testMakeRepo(self) -> None:
1780 # The base class test assumes that it's using sqlite and assumes
1781 # the config file is acceptable to sqlite.
1782 raise unittest.SkipTest("Postgres config is not compatible with this test.")
1785class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1786 """InMemoryDatastore specialization of a butler"""
1788 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1789 fullConfigKey = None
1790 useTempRoot = False
1791 validationCanFail = False
1792 datastoreStr = ["datastore='InMemory"]
1793 datastoreName = ["InMemoryDatastore@"]
1794 registryStr = "/gen3.sqlite3"
1796 def testIngest(self) -> None:
1797 pass
1800class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1801 """PosixDatastore specialization"""
1803 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1804 fullConfigKey = ".datastore.datastores.1.formatters"
1805 validationCanFail = True
1806 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1807 datastoreName = [
1808 "InMemoryDatastore@",
1809 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1810 "SecondDatastore",
1811 ]
1812 registryStr = "/gen3.sqlite3"
1815class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1816 """Test that a yaml file in one location can refer to a root in another."""
1818 datastoreStr = ["dir1"]
1819 # Disable the makeRepo test since we are deliberately not using
1820 # butler.yaml as the config name.
1821 fullConfigKey = None
1823 def setUp(self) -> None:
1824 self.root = makeTestTempDir(TESTDIR)
1826 # Make a new repository in one place
1827 self.dir1 = os.path.join(self.root, "dir1")
1828 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1830 # Move the yaml file to a different place and add a "root"
1831 self.dir2 = os.path.join(self.root, "dir2")
1832 os.makedirs(self.dir2, exist_ok=True)
1833 configFile1 = os.path.join(self.dir1, "butler.yaml")
1834 config = Config(configFile1)
1835 config["root"] = self.dir1
1836 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1837 config.dumpToUri(configFile2)
1838 os.remove(configFile1)
1839 self.tmpConfigFile = configFile2
1841 def testFileLocations(self) -> None:
1842 self.assertNotEqual(self.dir1, self.dir2)
1843 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1844 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1845 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1848class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1849 """Test that a config file created by makeRepo outside of repo works."""
1851 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1853 def setUp(self) -> None:
1854 self.root = makeTestTempDir(TESTDIR)
1855 self.root2 = makeTestTempDir(TESTDIR)
1857 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1858 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1860 def tearDown(self) -> None:
1861 if os.path.exists(self.root2):
1862 shutil.rmtree(self.root2, ignore_errors=True)
1863 super().tearDown()
1865 def testConfigExistence(self) -> None:
1866 c = Config(self.tmpConfigFile)
1867 uri_config = ResourcePath(c["root"])
1868 uri_expected = ResourcePath(self.root, forceDirectory=True)
1869 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1870 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1872 def testPutGet(self) -> None:
1873 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1874 self.runPutGetTest(storageClass, "test_metric")
1877class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1878 """Test that a config file created by makeRepo outside of repo works."""
1880 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1882 def setUp(self) -> None:
1883 self.root = makeTestTempDir(TESTDIR)
1884 self.root2 = makeTestTempDir(TESTDIR)
1886 self.tmpConfigFile = self.root2
1887 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1889 def testConfigExistence(self) -> None:
1890 # Append the yaml file else Config constructor does not know the file
1891 # type.
1892 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1893 super().testConfigExistence()
1896class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1897 """Test that a config file created by makeRepo outside of repo works."""
1899 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1901 def setUp(self) -> None:
1902 self.root = makeTestTempDir(TESTDIR)
1903 self.root2 = makeTestTempDir(TESTDIR)
1905 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl()
1906 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1909@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1910class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1911 """S3Datastore specialization of a butler; an S3 storage Datastore +
1912 a local in-memory SqlRegistry.
1913 """
1915 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1916 fullConfigKey = None
1917 validationCanFail = True
1919 bucketName = "anybucketname"
1920 """Name of the Bucket that will be used in the tests. The name is read from
1921 the config file used with the tests during set-up.
1922 """
1924 root = "butlerRoot/"
1925 """Root repository directory expected to be used in case useTempRoot=False.
1926 Otherwise the root is set to a 20 characters long randomly generated string
1927 during set-up.
1928 """
1930 datastoreStr = [f"datastore={root}"]
1931 """Contains all expected root locations in a format expected to be
1932 returned by Butler stringification.
1933 """
1935 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1936 """The expected format of the S3 Datastore string."""
1938 registryStr = "/gen3.sqlite3"
1939 """Expected format of the Registry string."""
1941 mock_s3 = mock_s3()
1942 """The mocked s3 interface from moto."""
1944 def genRoot(self) -> str:
1945 """Return a random string of len 20 to serve as a root
1946 name for the temporary bucket repo.
1948 This is equivalent to tempfile.mkdtemp as this is what self.root
1949 becomes when useTempRoot is True.
1950 """
1951 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1952 return rndstr + "/"
1954 def setUp(self) -> None:
1955 config = Config(self.configFile)
1956 uri = ResourcePath(config[".datastore.datastore.root"])
1957 self.bucketName = uri.netloc
1959 # Enable S3 mocking of tests.
1960 self.mock_s3.start()
1962 # set up some fake credentials if they do not exist
1963 self.usingDummyCredentials = setAwsEnvCredentials()
1965 if self.useTempRoot:
1966 self.root = self.genRoot()
1967 rooturi = f"s3://{self.bucketName}/{self.root}"
1968 config.update({"datastore": {"datastore": {"root": rooturi}}})
1970 # need local folder to store registry database
1971 self.reg_dir = makeTestTempDir(TESTDIR)
1972 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1974 # MOTO needs to know that we expect Bucket bucketname to exist
1975 # (this used to be the class attribute bucketName)
1976 s3 = boto3.resource("s3")
1977 s3.create_bucket(Bucket=self.bucketName)
1979 self.datastoreStr = [f"datastore='{rooturi}'"]
1980 self.datastoreName = [f"FileDatastore@{rooturi}"]
1981 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1982 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1984 def tearDown(self) -> None:
1985 s3 = boto3.resource("s3")
1986 bucket = s3.Bucket(self.bucketName)
1987 try:
1988 bucket.objects.all().delete()
1989 except botocore.exceptions.ClientError as e:
1990 if e.response["Error"]["Code"] == "404":
1991 # the key was not reachable - pass
1992 pass
1993 else:
1994 raise
1996 bucket = s3.Bucket(self.bucketName)
1997 bucket.delete()
1999 # Stop the S3 mock.
2000 self.mock_s3.stop()
2002 # unset any potentially set dummy credentials
2003 if self.usingDummyCredentials:
2004 unsetAwsEnvCredentials()
2006 if self.reg_dir is not None and os.path.exists(self.reg_dir):
2007 shutil.rmtree(self.reg_dir, ignore_errors=True)
2009 if self.useTempRoot and os.path.exists(self.root):
2010 shutil.rmtree(self.root, ignore_errors=True)
2012 super().tearDown()
2015class PosixDatastoreTransfers(unittest.TestCase):
2016 """Test data transfers between butlers.
2018 Test for different managers. UUID to UUID and integer to integer are
2019 tested. UUID to integer is not supported since we do not currently
2020 want to allow that. Integer to UUID is supported with the caveat
2021 that UUID4 will be generated and this will be incorrect for raw
2022 dataset types. The test ignores that.
2023 """
2025 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2026 storageClassFactory: StorageClassFactory
2028 @classmethod
2029 def setUpClass(cls) -> None:
2030 cls.storageClassFactory = StorageClassFactory()
2031 cls.storageClassFactory.addFromConfig(cls.configFile)
2033 def setUp(self) -> None:
2034 self.root = makeTestTempDir(TESTDIR)
2035 self.config = Config(self.configFile)
2037 def tearDown(self) -> None:
2038 removeTestTempDir(self.root)
2040 def create_butler(self, manager: str, label: str) -> Butler:
2041 config = Config(self.configFile)
2042 config["registry", "managers", "datasets"] = manager
2043 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True)
2045 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None:
2046 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
2047 if manager1 is None:
2048 manager1 = default
2049 if manager2 is None:
2050 manager2 = default
2051 self.source_butler = self.create_butler(manager1, "1")
2052 self.target_butler = self.create_butler(manager2, "2")
2054 def testTransferUuidToUuid(self) -> None:
2055 self.create_butlers()
2056 self.assertButlerTransfers()
2058 def _enable_trust(self, datastore: Datastore) -> None:
2059 if hasattr(datastore, "trustGetRequest"):
2060 datastore.trustGetRequest = True
2061 elif hasattr(datastore, "datastores"):
2062 for this_datastore in datastore.datastores:
2063 if hasattr(this_datastore, "trustGetRequest"):
2064 this_datastore.trustGetRequest = True
2066 def testTransferMissing(self) -> None:
2067 """Test transfers where datastore records are missing.
2069 This is how execution butler works.
2070 """
2071 self.create_butlers()
2073 # Configure the source butler to allow trust.
2074 self._enable_trust(self.source_butler._datastore)
2076 self.assertButlerTransfers(purge=True)
2078 def testTransferMissingDisassembly(self) -> None:
2079 """Test transfers where datastore records are missing.
2081 This is how execution butler works.
2082 """
2083 self.create_butlers()
2085 # Configure the source butler to allow trust.
2086 self._enable_trust(self.source_butler._datastore)
2088 # Test disassembly.
2089 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
2091 def testAbsoluteURITransferDirect(self) -> None:
2092 """Test transfer using an absolute URI."""
2093 self._absolute_transfer("auto")
2095 def testAbsoluteURITransferCopy(self) -> None:
2096 """Test transfer using an absolute URI."""
2097 self._absolute_transfer("copy")
2099 def _absolute_transfer(self, transfer: str) -> None:
2100 self.create_butlers()
2102 storageClassName = "StructuredData"
2103 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2104 datasetTypeName = "random_data"
2105 run = "run1"
2106 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2108 dimensions = self.source_butler.dimensions.extract(())
2109 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2110 self.source_butler.registry.registerDatasetType(datasetType)
2112 metrics = makeExampleMetrics()
2113 with ResourcePath.temporary_uri(suffix=".json") as temp:
2114 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions)
2115 source_refs = [DatasetRef(datasetType, dataId, run=run)]
2116 temp.write(json.dumps(metrics.exportAsDict()).encode())
2117 dataset = FileDataset(path=temp, refs=source_refs)
2118 self.source_butler.ingest(dataset, transfer="direct")
2120 self.target_butler.transfer_from(
2121 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer
2122 )
2124 uri = self.target_butler.getURI(dataset.refs[0])
2125 if transfer == "auto":
2126 self.assertEqual(uri, temp)
2127 else:
2128 self.assertNotEqual(uri, temp)
2130 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None:
2131 """Test that a run can be transferred to another butler."""
2132 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2133 datasetTypeName = "random_data"
2135 # Test will create 3 collections and we will want to transfer
2136 # two of those three.
2137 runs = ["run1", "run2", "other"]
2139 # Also want to use two different dataset types to ensure that
2140 # grouping works.
2141 datasetTypeNames = ["random_data", "random_data_2"]
2143 # Create the run collections in the source butler.
2144 for run in runs:
2145 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2147 # Create dimensions in source butler.
2148 n_exposures = 30
2149 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
2150 self.source_butler.registry.insertDimensionData(
2151 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
2152 )
2153 self.source_butler.registry.insertDimensionData(
2154 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
2155 )
2157 for i in range(n_exposures):
2158 self.source_butler.registry.insertDimensionData(
2159 "exposure",
2160 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"},
2161 )
2163 # Create dataset types in the source butler.
2164 dimensions = self.source_butler.dimensions.extract(["instrument", "exposure"])
2165 for datasetTypeName in datasetTypeNames:
2166 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2167 self.source_butler.registry.registerDatasetType(datasetType)
2169 # Write a dataset to an unrelated run -- this will ensure that
2170 # we are rewriting integer dataset ids in the target if necessary.
2171 # Will not be relevant for UUID.
2172 run = "distraction"
2173 butler = Butler(butler=self.source_butler, run=run)
2174 butler.put(
2175 makeExampleMetrics(),
2176 datasetTypeName,
2177 exposure=1,
2178 instrument="DummyCamComp",
2179 physical_filter="d-r",
2180 )
2182 # Write some example metrics to the source
2183 butler = Butler(butler=self.source_butler)
2185 # Set of DatasetRefs that should be in the list of refs to transfer
2186 # but which will not be transferred.
2187 deleted: set[DatasetRef] = set()
2189 n_expected = 20 # Number of datasets expected to be transferred
2190 source_refs = []
2191 for i in range(n_exposures):
2192 # Put a third of datasets into each collection, only retain
2193 # two thirds.
2194 index = i % 3
2195 run = runs[index]
2196 datasetTypeName = datasetTypeNames[i % 2]
2198 metric = MetricsExample(
2199 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)]
2200 )
2201 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"}
2202 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
2204 # Remove the datastore record using low-level API, but only
2205 # for a specific index.
2206 if purge and index == 1:
2207 # For one of these delete the file as well.
2208 # This allows the "missing" code to filter the
2209 # file out.
2210 # Access the individual datastores.
2211 datastores = []
2212 if hasattr(butler._datastore, "datastores"):
2213 datastores.extend(butler._datastore.datastores)
2214 else:
2215 datastores.append(butler._datastore)
2217 if not deleted:
2218 # For a chained datastore we need to remove
2219 # files in each chain.
2220 for datastore in datastores:
2221 # The file might not be known to the datastore
2222 # if constraints are used.
2223 try:
2224 primary, uris = datastore.getURIs(ref)
2225 except FileNotFoundError:
2226 continue
2227 if primary and primary.scheme != "mem":
2228 primary.remove()
2229 for uri in uris.values():
2230 if uri.scheme != "mem":
2231 uri.remove()
2232 n_expected -= 1
2233 deleted.add(ref)
2235 # Remove the datastore record.
2236 for datastore in datastores:
2237 if hasattr(datastore, "removeStoredItemInfo"):
2238 datastore.removeStoredItemInfo(ref)
2240 if index < 2:
2241 source_refs.append(ref)
2242 if ref not in deleted:
2243 new_metric = butler.get(ref)
2244 self.assertEqual(new_metric, metric)
2246 # Create some bad dataset types to ensure we check for inconsistent
2247 # definitions.
2248 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
2249 for datasetTypeName in datasetTypeNames:
2250 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
2251 self.target_butler.registry.registerDatasetType(datasetType)
2252 with self.assertRaises(ConflictingDefinitionError) as cm:
2253 self.target_butler.transfer_from(self.source_butler, source_refs)
2254 self.assertIn("dataset type differs", str(cm.exception))
2256 # And remove the bad definitions.
2257 for datasetTypeName in datasetTypeNames:
2258 self.target_butler.registry.removeDatasetType(datasetTypeName)
2260 # Transfer without creating dataset types should fail.
2261 with self.assertRaises(KeyError):
2262 self.target_butler.transfer_from(self.source_butler, source_refs)
2264 # Transfer without creating dimensions should fail.
2265 with self.assertRaises(ConflictingDefinitionError) as cm:
2266 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True)
2267 self.assertIn("dimension", str(cm.exception))
2269 # The failed transfer above leaves registry in an inconsistent
2270 # state because the run is created but then rolled back without
2271 # the collection cache being cleared. For now force a refresh.
2272 # Can remove with DM-35498.
2273 self.target_butler.registry.refresh()
2275 # Now transfer them to the second butler, including dimensions.
2276 with self.assertLogs(level=logging.DEBUG) as log_cm:
2277 transferred = self.target_butler.transfer_from(
2278 self.source_butler,
2279 source_refs,
2280 register_dataset_types=True,
2281 transfer_dimensions=True,
2282 )
2283 self.assertEqual(len(transferred), n_expected)
2284 log_output = ";".join(log_cm.output)
2286 # A ChainedDatastore will use the in-memory datastore for mexists
2287 # so we can not rely on the mexists log message.
2288 self.assertIn("Number of datastore records found in source", log_output)
2289 self.assertIn("Creating output run", log_output)
2291 # Do the transfer twice to ensure that it will do nothing extra.
2292 # Only do this if purge=True because it does not work for int
2293 # dataset_id.
2294 if purge:
2295 # This should not need to register dataset types.
2296 transferred = self.target_butler.transfer_from(self.source_butler, source_refs)
2297 self.assertEqual(len(transferred), n_expected)
2299 # Also do an explicit low-level transfer to trigger some
2300 # edge cases.
2301 with self.assertLogs(level=logging.DEBUG) as log_cm:
2302 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs)
2303 log_output = ";".join(log_cm.output)
2304 self.assertIn("no file artifacts exist", log_output)
2306 with self.assertRaises((TypeError, AttributeError)):
2307 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore
2309 with self.assertRaises(ValueError):
2310 self.target_butler._datastore.transfer_from(
2311 self.source_butler._datastore, source_refs, transfer="split"
2312 )
2314 # Now try to get the same refs from the new butler.
2315 for ref in source_refs:
2316 if ref not in deleted:
2317 new_metric = self.target_butler.get(ref)
2318 old_metric = self.source_butler.get(ref)
2319 self.assertEqual(new_metric, old_metric)
2321 # Now prune run2 collection and create instead a CHAINED collection.
2322 # This should block the transfer.
2323 self.target_butler.removeRuns(["run2"], unstore=True)
2324 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
2325 with self.assertRaises(CollectionTypeError):
2326 # Re-importing the run1 datasets can be problematic if they
2327 # use integer IDs so filter those out.
2328 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
2329 self.target_butler.transfer_from(self.source_butler, to_transfer)
2332class ChainedDatastoreTransfers(PosixDatastoreTransfers):
2333 """Test transfers using a chained datastore."""
2335 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
2338class NullDatastoreTestCase(unittest.TestCase):
2339 """Test that we can fall back to a null datastore."""
2341 # Need a good config to create the repo.
2342 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2343 storageClassFactory: StorageClassFactory
2345 @classmethod
2346 def setUpClass(cls) -> None:
2347 cls.storageClassFactory = StorageClassFactory()
2348 cls.storageClassFactory.addFromConfig(cls.configFile)
2350 def setUp(self) -> None:
2351 """Create a new butler root for each test."""
2352 self.root = makeTestTempDir(TESTDIR)
2353 Butler.makeRepo(self.root, config=Config(self.configFile))
2355 def tearDown(self) -> None:
2356 removeTestTempDir(self.root)
2358 def test_fallback(self) -> None:
2359 # Read the butler config and mess with the datastore section.
2360 bad_config = Config(os.path.join(self.root, "butler.yaml"))
2361 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore"
2363 with self.assertRaises(RuntimeError):
2364 Butler(bad_config)
2366 butler = Butler(bad_config, writeable=True, without_datastore=True)
2367 self.assertIsInstance(butler._datastore, NullDatastore)
2369 # Check that registry is working.
2370 butler.registry.registerRun("MYRUN")
2371 collections = butler.registry.queryCollections(...)
2372 self.assertIn("MYRUN", set(collections))
2374 # Create a ref.
2375 dimensions = butler.dimensions.extract([])
2376 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
2377 datasetTypeName = "metric"
2378 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2379 butler.registry.registerDatasetType(datasetType)
2380 ref = DatasetRef(datasetType, {}, run="MYRUN")
2382 # Check that datastore will complain.
2383 with self.assertRaises(FileNotFoundError):
2384 butler.get(ref)
2385 with self.assertRaises(FileNotFoundError):
2386 butler.getURI(ref)
2389def setup_module(module: types.ModuleType) -> None:
2390 """Set up the module for pytest."""
2391 clean_environment()
2394if __name__ == "__main__":
2395 clean_environment()
2396 unittest.main()