Coverage for tests/test_butler.py: 13%
1306 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Tests for Butler.
29"""
30from __future__ import annotations
32import gc
33import json
34import logging
35import os
36import pathlib
37import pickle
38import posixpath
39import random
40import shutil
41import string
42import tempfile
43import unittest
44import uuid
45from collections.abc import Mapping
46from typing import TYPE_CHECKING, Any, cast
48try:
49 import boto3
50 import botocore
51 from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
52 from moto import mock_s3 # type: ignore[import]
53except ImportError:
54 boto3 = None
56 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def]
57 """No-op decorator in case moto mock_s3 can not be imported."""
58 return None
61try:
62 # It's possible but silly to have testing.postgresql installed without
63 # having the postgresql server installed (because then nothing in
64 # testing.postgresql would work), so we use the presence of that module
65 # to test whether we can expect the server to be available.
66 import testing.postgresql # type: ignore[import]
67except ImportError:
68 testing = None
70import astropy.time
71import sqlalchemy
72from lsst.daf.butler import (
73 Butler,
74 ButlerConfig,
75 ButlerRepoIndex,
76 CollectionType,
77 Config,
78 DataCoordinate,
79 DatasetExistence,
80 DatasetRef,
81 DatasetType,
82 FileDataset,
83 StorageClassFactory,
84 ValidationError,
85 script,
86)
87from lsst.daf.butler.datastore import NullDatastore
88from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError
89from lsst.daf.butler.datastores.fileDatastore import FileDatastore
90from lsst.daf.butler.registries.sql import SqlRegistry
91from lsst.daf.butler.registry import (
92 CollectionError,
93 CollectionTypeError,
94 ConflictingDefinitionError,
95 DataIdValueError,
96 MissingCollectionError,
97 OrphanedRecordError,
98)
99from lsst.daf.butler.repo_relocation import BUTLER_ROOT_TAG
100from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter
101from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir
102from lsst.resources import ResourcePath
103from lsst.utils import doImportType
104from lsst.utils.introspection import get_full_type_name
106if TYPE_CHECKING:
107 import types
109 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass
111TESTDIR = os.path.abspath(os.path.dirname(__file__))
114def clean_environment() -> None:
115 """Remove external environment variables that affect the tests."""
116 for k in (
117 "DAF_BUTLER_REPOSITORY_INDEX",
118 "S3_ENDPOINT_URL",
119 "AWS_ACCESS_KEY_ID",
120 "AWS_SECRET_ACCESS_KEY",
121 "AWS_SHARED_CREDENTIALS_FILE",
122 ):
123 os.environ.pop(k, None)
126def makeExampleMetrics() -> MetricsExample:
127 """Return example dataset suitable for tests."""
128 return MetricsExample(
129 {"AM1": 5.2, "AM2": 30.6},
130 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
131 [563, 234, 456.7, 752, 8, 9, 27],
132 )
135class TransactionTestError(Exception):
136 """Specific error for testing transactions, to prevent misdiagnosing
137 that might otherwise occur when a standard exception is used.
138 """
140 pass
143class ButlerConfigTests(unittest.TestCase):
144 """Simple tests for ButlerConfig that are not tested in any other test
145 cases.
146 """
148 def testSearchPath(self) -> None:
149 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
150 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
151 config1 = ButlerConfig(configFile)
152 self.assertNotIn("testConfigs", "\n".join(cm.output))
154 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
155 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
156 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
157 self.assertIn("testConfigs", "\n".join(cm.output))
159 key = ("datastore", "records", "table")
160 self.assertNotEqual(config1[key], config2[key])
161 self.assertEqual(config2[key], "override_record")
164class ButlerPutGetTests(TestCaseMixin):
165 """Helper method for running a suite of put/get tests from different
166 butler configurations.
167 """
169 root: str
170 default_run = "ingésτ😺"
171 storageClassFactory: StorageClassFactory
172 configFile: str
173 tmpConfigFile: str
175 @staticmethod
176 def addDatasetType(
177 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry
178 ) -> DatasetType:
179 """Create a DatasetType and register it"""
180 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
181 registry.registerDatasetType(datasetType)
182 return datasetType
184 @classmethod
185 def setUpClass(cls) -> None:
186 cls.storageClassFactory = StorageClassFactory()
187 cls.storageClassFactory.addFromConfig(cls.configFile)
189 def assertGetComponents(
190 self,
191 butler: Butler,
192 datasetRef: DatasetRef,
193 components: tuple[str, ...],
194 reference: Any,
195 collections: Any = None,
196 ) -> None:
197 datasetType = datasetRef.datasetType
198 dataId = datasetRef.dataId
199 deferred = butler.getDeferred(datasetRef)
201 for component in components:
202 compTypeName = datasetType.componentTypeName(component)
203 result = butler.get(compTypeName, dataId, collections=collections)
204 self.assertEqual(result, getattr(reference, component))
205 result_deferred = deferred.get(component=component)
206 self.assertEqual(result_deferred, result)
208 def tearDown(self) -> None:
209 removeTestTempDir(self.root)
211 def create_butler(
212 self, run: str, storageClass: StorageClass | str, datasetTypeName: str
213 ) -> tuple[Butler, DatasetType]:
214 butler = Butler(self.tmpConfigFile, run=run)
216 collections = set(butler.registry.queryCollections())
217 self.assertEqual(collections, {run})
219 # Create and register a DatasetType
220 dimensions = butler.dimensions.extract(["instrument", "visit"])
222 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
224 # Add needed Dimensions
225 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
226 butler.registry.insertDimensionData(
227 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
228 )
229 butler.registry.insertDimensionData(
230 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"}
231 )
232 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
233 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
234 butler.registry.insertDimensionData(
235 "visit",
236 {
237 "instrument": "DummyCamComp",
238 "id": 423,
239 "name": "fourtwentythree",
240 "physical_filter": "d-r",
241 "visit_system": 1,
242 "datetime_begin": visit_start,
243 "datetime_end": visit_end,
244 },
245 )
247 # Add more visits for some later tests
248 for visit_id in (424, 425):
249 butler.registry.insertDimensionData(
250 "visit",
251 {
252 "instrument": "DummyCamComp",
253 "id": visit_id,
254 "name": f"fourtwentyfour_{visit_id}",
255 "physical_filter": "d-r",
256 "visit_system": 1,
257 },
258 )
259 return butler, datasetType
261 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler:
262 # New datasets will be added to run and tag, but we will only look in
263 # tag when looking up datasets.
264 run = self.default_run
265 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
266 assert butler.run is not None
268 # Create and store a dataset
269 metric = makeExampleMetrics()
270 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423})
272 # Put and remove the dataset once as a DatasetRef, once as a dataId,
273 # and once with a DatasetType
275 # Keep track of any collections we add and do not clean up
276 expected_collections = {run}
278 counter = 0
279 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1")
280 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate]
281 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)):
282 # Since we are using subTest we can get cascading failures
283 # here with the first attempt failing and the others failing
284 # immediately because the dataset already exists. Work around
285 # this by using a distinct run collection each time
286 counter += 1
287 this_run = f"put_run_{counter}"
288 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
289 expected_collections.update({this_run})
291 with self.subTest(args=args):
292 kwargs: dict[str, Any] = {}
293 if not isinstance(args[0], DatasetRef): # type: ignore
294 kwargs["run"] = this_run
295 ref = butler.put(metric, *args, **kwargs)
296 self.assertIsInstance(ref, DatasetRef)
298 # Test getDirect
299 metricOut = butler.get(ref)
300 self.assertEqual(metric, metricOut)
301 # Test get
302 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
303 self.assertEqual(metric, metricOut)
304 # Test get with a datasetRef
305 metricOut = butler.get(ref)
306 self.assertEqual(metric, metricOut)
307 # Test getDeferred with dataId
308 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
309 self.assertEqual(metric, metricOut)
310 # Test getDeferred with a ref
311 metricOut = butler.getDeferred(ref).get()
312 self.assertEqual(metric, metricOut)
314 # Check we can get components
315 if storageClass.isComposite():
316 self.assertGetComponents(
317 butler, ref, ("summary", "data", "output"), metric, collections=this_run
318 )
320 # Can the artifacts themselves be retrieved?
321 if not butler._datastore.isEphemeral:
322 root_uri = ResourcePath(self.root)
324 for preserve_path in (True, False):
325 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
326 # Use copy so that we can test that overwrite
327 # protection works (using "auto" for File URIs would
328 # use hard links and subsequent transfer would work
329 # because it knows they are the same file).
330 transferred = butler.retrieveArtifacts(
331 [ref], destination, preserve_path=preserve_path, transfer="copy"
332 )
333 self.assertGreater(len(transferred), 0)
334 artifacts = list(ResourcePath.findFileResources([destination]))
335 self.assertEqual(set(transferred), set(artifacts))
337 for artifact in transferred:
338 path_in_destination = artifact.relative_to(destination)
339 self.assertIsNotNone(path_in_destination)
340 assert path_in_destination is not None
342 # when path is not preserved there should not be
343 # any path separators.
344 num_seps = path_in_destination.count("/")
345 if preserve_path:
346 self.assertGreater(num_seps, 0)
347 else:
348 self.assertEqual(num_seps, 0)
350 primary_uri, secondary_uris = butler.getURIs(ref)
351 n_uris = len(secondary_uris)
352 if primary_uri:
353 n_uris += 1
354 self.assertEqual(
355 len(artifacts),
356 n_uris,
357 "Comparing expected artifacts vs actual:"
358 f" {artifacts} vs {primary_uri} and {secondary_uris}",
359 )
361 if preserve_path:
362 # No need to run these twice
363 with self.assertRaises(ValueError):
364 butler.retrieveArtifacts([ref], destination, transfer="move")
366 with self.assertRaises(FileExistsError):
367 butler.retrieveArtifacts([ref], destination)
369 transferred_again = butler.retrieveArtifacts(
370 [ref], destination, preserve_path=preserve_path, overwrite=True
371 )
372 self.assertEqual(set(transferred_again), set(transferred))
374 # Now remove the dataset completely.
375 butler.pruneDatasets([ref], purge=True, unstore=True)
376 # Lookup with original args should still fail.
377 kwargs = {"collections": this_run}
378 if isinstance(args[0], DatasetRef):
379 kwargs = {} # Prevent warning from being issued.
380 self.assertFalse(butler.exists(*args, **kwargs))
381 # get() should still fail.
382 with self.assertRaises(FileNotFoundError):
383 butler.get(ref)
384 # Registry shouldn't be able to find it by dataset_id anymore.
385 self.assertIsNone(butler.registry.getDataset(ref.id))
387 # Do explicit registry removal since we know they are
388 # empty
389 butler.registry.removeCollection(this_run)
390 expected_collections.remove(this_run)
392 # Create DatasetRef for put using default run.
393 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run)
395 # Check that getDeferred fails with standalone ref.
396 with self.assertRaises(LookupError):
397 butler.getDeferred(refIn)
399 # Put the dataset again, since the last thing we did was remove it
400 # and we want to use the default collection.
401 ref = butler.put(metric, refIn)
403 # Get with parameters
404 stop = 4
405 sliced = butler.get(ref, parameters={"slice": slice(stop)})
406 self.assertNotEqual(metric, sliced)
407 self.assertEqual(metric.summary, sliced.summary)
408 self.assertEqual(metric.output, sliced.output)
409 assert metric.data is not None # for mypy
410 self.assertEqual(metric.data[:stop], sliced.data)
411 # getDeferred with parameters
412 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
413 self.assertNotEqual(metric, sliced)
414 self.assertEqual(metric.summary, sliced.summary)
415 self.assertEqual(metric.output, sliced.output)
416 self.assertEqual(metric.data[:stop], sliced.data)
417 # getDeferred with deferred parameters
418 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
419 self.assertNotEqual(metric, sliced)
420 self.assertEqual(metric.summary, sliced.summary)
421 self.assertEqual(metric.output, sliced.output)
422 self.assertEqual(metric.data[:stop], sliced.data)
424 if storageClass.isComposite():
425 # Check that components can be retrieved
426 metricOut = butler.get(ref.datasetType.name, dataId)
427 compNameS = ref.datasetType.componentTypeName("summary")
428 compNameD = ref.datasetType.componentTypeName("data")
429 summary = butler.get(compNameS, dataId)
430 self.assertEqual(summary, metric.summary)
431 data = butler.get(compNameD, dataId)
432 self.assertEqual(data, metric.data)
434 if "counter" in storageClass.derivedComponents:
435 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
436 self.assertEqual(count, len(data))
438 count = butler.get(
439 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)}
440 )
441 self.assertEqual(count, stop)
443 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
444 assert compRef is not None
445 summary = butler.get(compRef)
446 self.assertEqual(summary, metric.summary)
448 # Create a Dataset type that has the same name but is inconsistent.
449 inconsistentDatasetType = DatasetType(
450 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config")
451 )
453 # Getting with a dataset type that does not match registry fails
454 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"):
455 butler.get(inconsistentDatasetType, dataId)
457 # Combining a DatasetRef with a dataId should fail
458 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"):
459 butler.get(ref, dataId)
460 # Getting with an explicit ref should fail if the id doesn't match.
461 with self.assertRaises(FileNotFoundError):
462 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run))
464 # Getting a dataset with unknown parameters should fail
465 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"):
466 butler.get(ref, parameters={"unsupported": True})
468 # Check we have a collection
469 collections = set(butler.registry.queryCollections())
470 self.assertEqual(collections, expected_collections)
472 # Clean up to check that we can remove something that may have
473 # already had a component removed
474 butler.pruneDatasets([ref], unstore=True, purge=True)
476 # Add the same ref again, so we can check that duplicate put fails.
477 ref = butler.put(metric, datasetType, dataId)
479 # Repeat put will fail.
480 with self.assertRaisesRegex(
481 ConflictingDefinitionError, "A database constraint failure was triggered"
482 ):
483 butler.put(metric, datasetType, dataId)
485 # Remove the datastore entry.
486 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
488 # Put will still fail
489 with self.assertRaisesRegex(
490 ConflictingDefinitionError, "A database constraint failure was triggered"
491 ):
492 butler.put(metric, datasetType, dataId)
494 # Repeat the same sequence with resolved ref.
495 butler.pruneDatasets([ref], unstore=True, purge=True)
496 ref = butler.put(metric, refIn)
498 # Repeat put will fail.
499 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"):
500 butler.put(metric, refIn)
502 # Remove the datastore entry.
503 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
505 # In case of resolved ref this write will succeed.
506 ref = butler.put(metric, refIn)
508 # Leave the dataset in place since some downstream tests require
509 # something to be present
511 return butler
513 def testDeferredCollectionPassing(self) -> None:
514 # Construct a butler with no run or collection, but make it writeable.
515 butler = Butler(self.tmpConfigFile, writeable=True)
516 # Create and register a DatasetType
517 dimensions = butler.dimensions.extract(["instrument", "visit"])
518 datasetType = self.addDatasetType(
519 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry
520 )
521 # Add needed Dimensions
522 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
523 butler.registry.insertDimensionData(
524 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
525 )
526 butler.registry.insertDimensionData(
527 "visit",
528 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
529 )
530 dataId = {"instrument": "DummyCamComp", "visit": 423}
531 # Create dataset.
532 metric = makeExampleMetrics()
533 # Register a new run and put dataset.
534 run = "deferred"
535 self.assertTrue(butler.registry.registerRun(run))
536 # Second time it will be allowed but indicate no-op
537 self.assertFalse(butler.registry.registerRun(run))
538 ref = butler.put(metric, datasetType, dataId, run=run)
539 # Putting with no run should fail with TypeError.
540 with self.assertRaises(CollectionError):
541 butler.put(metric, datasetType, dataId)
542 # Dataset should exist.
543 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
544 # We should be able to get the dataset back, but with and without
545 # a deferred dataset handle.
546 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
547 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
548 # Trying to find the dataset without any collection is a TypeError.
549 self.assertFalse(butler.exists(datasetType, dataId))
550 with self.assertRaises(CollectionError):
551 butler.get(datasetType, dataId)
552 # Associate the dataset with a different collection.
553 butler.registry.registerCollection("tagged")
554 butler.registry.associate("tagged", [ref])
555 # Deleting the dataset from the new collection should make it findable
556 # in the original collection.
557 butler.pruneDatasets([ref], tags=["tagged"])
558 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
561class ButlerTests(ButlerPutGetTests):
562 """Tests for Butler."""
564 useTempRoot = True
565 validationCanFail: bool
566 fullConfigKey: str | None
567 registryStr: str | None
568 datastoreName: list[str] | None
569 datastoreStr: list[str]
571 def setUp(self) -> None:
572 """Create a new butler root for each test."""
573 self.root = makeTestTempDir(TESTDIR)
574 Butler.makeRepo(self.root, config=Config(self.configFile))
575 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
577 def testConstructor(self) -> None:
578 """Independent test of constructor."""
579 butler = Butler(self.tmpConfigFile, run=self.default_run)
580 self.assertIsInstance(butler, Butler)
582 # Check that butler.yaml is added automatically.
583 if self.tmpConfigFile.endswith(end := "/butler.yaml"):
584 config_dir = self.tmpConfigFile[: -len(end)]
585 butler = Butler(config_dir, run=self.default_run)
586 self.assertIsInstance(butler, Butler)
588 # Even with a ResourcePath.
589 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
590 self.assertIsInstance(butler, Butler)
592 collections = set(butler.registry.queryCollections())
593 self.assertEqual(collections, {self.default_run})
595 # Check that some special characters can be included in run name.
596 special_run = "u@b.c-A"
597 butler_special = Butler(butler=butler, run=special_run)
598 collections = set(butler_special.registry.queryCollections("*@*"))
599 self.assertEqual(collections, {special_run})
601 butler2 = Butler(butler=butler, collections=["other"])
602 self.assertEqual(butler2.collections, ("other",))
603 self.assertIsNone(butler2.run)
604 self.assertIs(butler._datastore, butler2._datastore)
606 # Test that we can use an environment variable to find this
607 # repository.
608 butler_index = Config()
609 butler_index["label"] = self.tmpConfigFile
610 for suffix in (".yaml", ".json"):
611 # Ensure that the content differs so that we know that
612 # we aren't reusing the cache.
613 bad_label = f"file://bucket/not_real{suffix}"
614 butler_index["bad_label"] = bad_label
615 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
616 butler_index.dumpToUri(temp_file)
617 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
618 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"})
619 uri = Butler.get_repo_uri("bad_label")
620 self.assertEqual(uri, ResourcePath(bad_label))
621 uri = Butler.get_repo_uri("label")
622 butler = Butler(uri, writeable=False)
623 self.assertIsInstance(butler, Butler)
624 butler = Butler("label", writeable=False)
625 self.assertIsInstance(butler, Butler)
626 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
627 Butler("not_there", writeable=False)
628 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"):
629 Butler("bad_label")
630 with self.assertRaises(FileNotFoundError):
631 # Should ignore aliases.
632 Butler(ResourcePath("label", forceAbsolute=False))
633 with self.assertRaises(KeyError) as cm:
634 Butler.get_repo_uri("missing")
635 self.assertEqual(
636 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False)
637 )
638 self.assertIn("not known to", str(cm.exception))
639 # Should report no failure.
640 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "")
641 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
642 # Now with empty configuration.
643 butler_index = Config()
644 butler_index.dumpToUri(temp_file)
645 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
646 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"):
647 Butler("label")
648 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
649 # Now with bad contents.
650 with open(temp_file.ospath, "w") as fh:
651 print("'", file=fh)
652 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
653 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"):
654 Butler("label")
655 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
656 with self.assertRaises(FileNotFoundError):
657 Butler.get_repo_uri("label")
658 self.assertEqual(Butler.get_known_repos(), set())
660 with self.assertRaisesRegex(FileNotFoundError, "index file not found"):
661 Butler("label")
663 # Check that we can create Butler when the alias file is not found.
664 butler = Butler(self.tmpConfigFile, writeable=False)
665 self.assertIsInstance(butler, Butler)
666 with self.assertRaises(KeyError) as cm:
667 # No environment variable set.
668 Butler.get_repo_uri("label")
669 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False))
670 self.assertIn("No repository index defined", str(cm.exception))
671 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"):
672 # No aliases registered.
673 Butler("not_there")
674 self.assertEqual(Butler.get_known_repos(), set())
676 def testBasicPutGet(self) -> None:
677 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
678 self.runPutGetTest(storageClass, "test_metric")
680 def testCompositePutGetConcrete(self) -> None:
681 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
682 butler = self.runPutGetTest(storageClass, "test_metric")
684 # Should *not* be disassembled
685 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
686 self.assertEqual(len(datasets), 1)
687 uri, components = butler.getURIs(datasets[0])
688 self.assertIsInstance(uri, ResourcePath)
689 self.assertFalse(components)
690 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
691 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
693 # Predicted dataset
694 dataId = {"instrument": "DummyCamComp", "visit": 424}
695 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
696 self.assertFalse(components)
697 self.assertIsInstance(uri, ResourcePath)
698 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
699 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
701 def testCompositePutGetVirtual(self) -> None:
702 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
703 butler = self.runPutGetTest(storageClass, "test_metric_comp")
705 # Should be disassembled
706 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
707 self.assertEqual(len(datasets), 1)
708 uri, components = butler.getURIs(datasets[0])
710 if butler._datastore.isEphemeral:
711 # Never disassemble in-memory datastore
712 self.assertIsInstance(uri, ResourcePath)
713 self.assertFalse(components)
714 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
715 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
716 else:
717 self.assertIsNone(uri)
718 self.assertEqual(set(components), set(storageClass.components))
719 for compuri in components.values():
720 self.assertIsInstance(compuri, ResourcePath)
721 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
722 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
724 # Predicted dataset
725 dataId = {"instrument": "DummyCamComp", "visit": 424}
726 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
728 if butler._datastore.isEphemeral:
729 # Never disassembled
730 self.assertIsInstance(uri, ResourcePath)
731 self.assertFalse(components)
732 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
733 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
734 else:
735 self.assertIsNone(uri)
736 self.assertEqual(set(components), set(storageClass.components))
737 for compuri in components.values():
738 self.assertIsInstance(compuri, ResourcePath)
739 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
740 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
742 def testStorageClassOverrideGet(self) -> None:
743 """Test storage class conversion on get with override."""
744 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
745 datasetTypeName = "anything"
746 run = self.default_run
748 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
750 # Create and store a dataset.
751 metric = makeExampleMetrics()
752 dataId = {"instrument": "DummyCamComp", "visit": 423}
754 ref = butler.put(metric, datasetType, dataId)
756 # Return native type.
757 retrieved = butler.get(ref)
758 self.assertEqual(retrieved, metric)
760 # Specify an override.
761 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
762 model = butler.get(ref, storageClass=new_sc)
763 self.assertNotEqual(type(model), type(retrieved))
764 self.assertIs(type(model), new_sc.pytype)
765 self.assertEqual(retrieved, model)
767 # Defer but override later.
768 deferred = butler.getDeferred(ref)
769 model = deferred.get(storageClass=new_sc)
770 self.assertIs(type(model), new_sc.pytype)
771 self.assertEqual(retrieved, model)
773 # Defer but override up front.
774 deferred = butler.getDeferred(ref, storageClass=new_sc)
775 model = deferred.get()
776 self.assertIs(type(model), new_sc.pytype)
777 self.assertEqual(retrieved, model)
779 # Retrieve a component. Should be a tuple.
780 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple")
781 self.assertIs(type(data), tuple)
782 self.assertEqual(data, tuple(retrieved.data))
784 # Parameter on the write storage class should work regardless
785 # of read storage class.
786 data = butler.get(
787 "anything.data",
788 dataId,
789 storageClass="StructuredDataDataTestTuple",
790 parameters={"slice": slice(2, 4)},
791 )
792 self.assertEqual(len(data), 2)
794 # Try a parameter that is known to the read storage class but not
795 # the write storage class.
796 with self.assertRaises(KeyError):
797 butler.get(
798 "anything.data",
799 dataId,
800 storageClass="StructuredDataDataTestTuple",
801 parameters={"xslice": slice(2, 4)},
802 )
804 def testPytypePutCoercion(self) -> None:
805 """Test python type coercion on Butler.get and put."""
806 # Store some data with the normal example storage class.
807 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
808 datasetTypeName = "test_metric"
809 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)
811 dataId = {"instrument": "DummyCamComp", "visit": 423}
813 # Put a dict and this should coerce to a MetricsExample
814 test_dict = {"summary": {"a": 1}, "output": {"b": 2}}
815 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424)
816 test_metric = butler.get(metric_ref)
817 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample")
818 self.assertEqual(test_metric.summary, test_dict["summary"])
819 self.assertEqual(test_metric.output, test_dict["output"])
821 # Check that the put still works if a DatasetType is given with
822 # a definition matching this python type.
823 registry_type = butler.registry.getDatasetType(datasetTypeName)
824 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson")
825 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425)
826 self.assertEqual(metric2_ref.datasetType, registry_type)
828 # The get will return the type expected by registry.
829 test_metric2 = butler.get(metric2_ref)
830 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample")
832 # Make a new DatasetRef with the compatible but different DatasetType.
833 # This should now return a dict.
834 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run)
835 test_dict2 = butler.get(new_ref)
836 self.assertEqual(get_full_type_name(test_dict2), "dict")
838 # Get it again with the wrong dataset type definition using get()
839 # rather than get(). This should be consistent with get()
840 # behavior and return the type of the DatasetType.
841 test_dict3 = butler.get(this_type, dataId=dataId, visit=425)
842 self.assertEqual(get_full_type_name(test_dict3), "dict")
844 def testIngest(self) -> None:
845 butler = Butler(self.tmpConfigFile, run=self.default_run)
847 # Create and register a DatasetType
848 dimensions = butler.dimensions.extract(["instrument", "visit", "detector"])
850 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
851 datasetTypeName = "metric"
853 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
855 # Add needed Dimensions
856 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
857 butler.registry.insertDimensionData(
858 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
859 )
860 for detector in (1, 2):
861 butler.registry.insertDimensionData(
862 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"}
863 )
865 butler.registry.insertDimensionData(
866 "visit",
867 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
868 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"},
869 )
871 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter")
872 dataRoot = os.path.join(TESTDIR, "data", "basic")
873 datasets = []
874 for detector in (1, 2):
875 detector_name = f"detector_{detector}"
876 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
877 dataId = butler.registry.expandDataId(
878 {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
879 )
880 # Create a DatasetRef for ingest
881 refIn = DatasetRef(datasetType, dataId, run=self.default_run)
883 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter))
885 butler.ingest(*datasets, transfer="copy")
887 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
888 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
890 metrics1 = butler.get(datasetTypeName, dataId1)
891 metrics2 = butler.get(datasetTypeName, dataId2)
892 self.assertNotEqual(metrics1, metrics2)
894 # Compare URIs
895 uri1 = butler.getURI(datasetTypeName, dataId1)
896 uri2 = butler.getURI(datasetTypeName, dataId2)
897 self.assertNotEqual(uri1, uri2)
899 # Now do a multi-dataset but single file ingest
900 metricFile = os.path.join(dataRoot, "detectors.yaml")
901 refs = []
902 for detector in (1, 2):
903 detector_name = f"detector_{detector}"
904 dataId = butler.registry.expandDataId(
905 {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
906 )
907 # Create a DatasetRef for ingest
908 refs.append(DatasetRef(datasetType, dataId, run=self.default_run))
910 # Test "move" transfer to ensure that the files themselves
911 # have disappeared following ingest.
912 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile:
913 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy")
915 datasets = []
916 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter))
918 # For first ingest use copy.
919 butler.ingest(*datasets, transfer="copy", record_validation_info=False)
921 # Now try to ingest again in "execution butler" mode where
922 # the registry entries exist but the datastore does not have
923 # the files. We also need to strip the dimension records to ensure
924 # that they will be re-added by the ingest.
925 ref = datasets[0].refs[0]
926 datasets[0].refs = [
927 cast(
928 DatasetRef,
929 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run),
930 )
931 for ref in datasets[0].refs
932 ]
933 all_refs = []
934 for dataset in datasets:
935 refs = []
936 for ref in dataset.refs:
937 # Create a dict from the dataId to drop the records.
938 new_data_id = {str(k): v for k, v in ref.dataId.items()}
939 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run)
940 assert new_ref is not None
941 self.assertFalse(new_ref.dataId.hasRecords())
942 refs.append(new_ref)
943 dataset.refs = refs
944 all_refs.extend(dataset.refs)
945 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False)
947 # Use move mode to test that the file is deleted. Also
948 # disable recording of file size.
949 butler.ingest(*datasets, transfer="move", record_validation_info=False)
951 # Check that every ref now has records.
952 for dataset in datasets:
953 for ref in dataset.refs:
954 self.assertTrue(ref.dataId.hasRecords())
956 # Ensure that the file has disappeared.
957 self.assertFalse(tempFile.exists())
959 # Check that the datastore recorded no file size.
960 # Not all datastores can support this.
961 try:
962 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined]
963 self.assertEqual(infos[0].file_size, -1)
964 except AttributeError:
965 pass
967 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
968 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
970 multi1 = butler.get(datasetTypeName, dataId1)
971 multi2 = butler.get(datasetTypeName, dataId2)
973 self.assertEqual(multi1, metrics1)
974 self.assertEqual(multi2, metrics2)
976 # Compare URIs
977 uri1 = butler.getURI(datasetTypeName, dataId1)
978 uri2 = butler.getURI(datasetTypeName, dataId2)
979 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
981 # Test that removing one does not break the second
982 # This line will issue a warning log message for a ChainedDatastore
983 # that uses an InMemoryDatastore since in-memory can not ingest
984 # files.
985 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
986 self.assertFalse(butler.exists(datasetTypeName, dataId1))
987 self.assertTrue(butler.exists(datasetTypeName, dataId2))
988 multi2b = butler.get(datasetTypeName, dataId2)
989 self.assertEqual(multi2, multi2b)
991 # Ensure we can ingest 0 datasets
992 datasets = []
993 butler.ingest(*datasets)
995 def testPickle(self) -> None:
996 """Test pickle support."""
997 butler = Butler(self.tmpConfigFile, run=self.default_run)
998 butlerOut = pickle.loads(pickle.dumps(butler))
999 self.assertIsInstance(butlerOut, Butler)
1000 self.assertEqual(butlerOut._config, butler._config)
1001 self.assertEqual(butlerOut.collections, butler.collections)
1002 self.assertEqual(butlerOut.run, butler.run)
1004 def testGetDatasetTypes(self) -> None:
1005 butler = Butler(self.tmpConfigFile, run=self.default_run)
1006 dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"])
1007 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [
1008 (
1009 "instrument",
1010 [
1011 {"instrument": "DummyCam"},
1012 {"instrument": "DummyHSC"},
1013 {"instrument": "DummyCamComp"},
1014 ],
1015 ),
1016 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]),
1017 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]),
1018 ]
1019 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1020 # Add needed Dimensions
1021 for element, data in dimensionEntries:
1022 butler.registry.insertDimensionData(element, *data)
1024 # When a DatasetType is added to the registry entries are not created
1025 # for components but querying them can return the components.
1026 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
1027 components = set()
1028 for datasetTypeName in datasetTypeNames:
1029 # Create and register a DatasetType
1030 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1032 for componentName in storageClass.components:
1033 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
1035 fromRegistry: set[DatasetType] = set()
1036 for parent_dataset_type in butler.registry.queryDatasetTypes():
1037 fromRegistry.add(parent_dataset_type)
1038 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes())
1039 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
1041 # Now that we have some dataset types registered, validate them
1042 butler.validateConfiguration(
1043 ignore=[
1044 "test_metric_comp",
1045 "metric3",
1046 "metric5",
1047 "calexp",
1048 "DummySC",
1049 "datasetType.component",
1050 "random_data",
1051 "random_data_2",
1052 ]
1053 )
1055 # Add a new datasetType that will fail template validation
1056 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
1057 if self.validationCanFail:
1058 with self.assertRaises(ValidationError):
1059 butler.validateConfiguration()
1061 # Rerun validation but with a subset of dataset type names
1062 butler.validateConfiguration(datasetTypeNames=["metric4"])
1064 # Rerun validation but ignore the bad datasetType
1065 butler.validateConfiguration(
1066 ignore=[
1067 "test_metric_comp",
1068 "metric3",
1069 "metric5",
1070 "calexp",
1071 "DummySC",
1072 "datasetType.component",
1073 "random_data",
1074 "random_data_2",
1075 ]
1076 )
1078 def testTransaction(self) -> None:
1079 butler = Butler(self.tmpConfigFile, run=self.default_run)
1080 datasetTypeName = "test_metric"
1081 dimensions = butler.dimensions.extract(["instrument", "visit"])
1082 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = (
1083 ("instrument", {"instrument": "DummyCam"}),
1084 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1085 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1086 )
1087 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1088 metric = makeExampleMetrics()
1089 dataId = {"instrument": "DummyCam", "visit": 42}
1090 # Create and register a DatasetType
1091 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1092 with self.assertRaises(TransactionTestError):
1093 with butler.transaction():
1094 # Add needed Dimensions
1095 for args in dimensionEntries:
1096 butler.registry.insertDimensionData(*args)
1097 # Store a dataset
1098 ref = butler.put(metric, datasetTypeName, dataId)
1099 self.assertIsInstance(ref, DatasetRef)
1100 # Test getDirect
1101 metricOut = butler.get(ref)
1102 self.assertEqual(metric, metricOut)
1103 # Test get
1104 metricOut = butler.get(datasetTypeName, dataId)
1105 self.assertEqual(metric, metricOut)
1106 # Check we can get components
1107 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric)
1108 raise TransactionTestError("This should roll back the entire transaction")
1109 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"):
1110 butler.registry.expandDataId(dataId)
1111 # Should raise LookupError for missing data ID value
1112 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
1113 butler.get(datasetTypeName, dataId)
1114 # Also check explicitly if Dataset entry is missing
1115 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
1116 # Direct retrieval should not find the file in the Datastore
1117 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
1118 butler.get(ref)
1120 def testMakeRepo(self) -> None:
1121 """Test that we can write butler configuration to a new repository via
1122 the Butler.makeRepo interface and then instantiate a butler from the
1123 repo root.
1124 """
1125 # Do not run the test if we know this datastore configuration does
1126 # not support a file system root
1127 if self.fullConfigKey is None:
1128 return
1130 # create two separate directories
1131 root1 = tempfile.mkdtemp(dir=self.root)
1132 root2 = tempfile.mkdtemp(dir=self.root)
1134 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
1135 limited = Config(self.configFile)
1136 butler1 = Butler(butlerConfig)
1137 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
1138 full = Config(self.tmpConfigFile)
1139 butler2 = Butler(butlerConfig)
1140 # Butlers should have the same configuration regardless of whether
1141 # defaults were expanded.
1142 self.assertEqual(butler1._config, butler2._config)
1143 # Config files loaded directly should not be the same.
1144 self.assertNotEqual(limited, full)
1145 # Make sure "limited" doesn't have a few keys we know it should be
1146 # inheriting from defaults.
1147 self.assertIn(self.fullConfigKey, full)
1148 self.assertNotIn(self.fullConfigKey, limited)
1150 # Collections don't appear until something is put in them
1151 collections1 = set(butler1.registry.queryCollections())
1152 self.assertEqual(collections1, set())
1153 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
1155 # Check that a config with no associated file name will not
1156 # work properly with relocatable Butler repo
1157 butlerConfig.configFile = None
1158 with self.assertRaises(ValueError):
1159 Butler(butlerConfig)
1161 with self.assertRaises(FileExistsError):
1162 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)
1164 def testStringification(self) -> None:
1165 butler = Butler(self.tmpConfigFile, run=self.default_run)
1166 butlerStr = str(butler)
1168 if self.datastoreStr is not None:
1169 for testStr in self.datastoreStr:
1170 self.assertIn(testStr, butlerStr)
1171 if self.registryStr is not None:
1172 self.assertIn(self.registryStr, butlerStr)
1174 datastoreName = butler._datastore.name
1175 if self.datastoreName is not None:
1176 for testStr in self.datastoreName:
1177 self.assertIn(testStr, datastoreName)
1179 def testButlerRewriteDataId(self) -> None:
1180 """Test that dataIds can be rewritten based on dimension records."""
1181 butler = Butler(self.tmpConfigFile, run=self.default_run)
1183 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1184 datasetTypeName = "random_data"
1186 # Create dimension records.
1187 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1188 butler.registry.insertDimensionData(
1189 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1190 )
1191 butler.registry.insertDimensionData(
1192 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1193 )
1195 dimensions = butler.dimensions.extract(["instrument", "exposure"])
1196 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1197 butler.registry.registerDatasetType(datasetType)
1199 n_exposures = 5
1200 dayobs = 20210530
1202 for i in range(n_exposures):
1203 butler.registry.insertDimensionData(
1204 "exposure",
1205 {
1206 "instrument": "DummyCamComp",
1207 "id": i,
1208 "obs_id": f"exp{i}",
1209 "seq_num": i,
1210 "day_obs": dayobs,
1211 "physical_filter": "d-r",
1212 },
1213 )
1215 # Write some data.
1216 for i in range(n_exposures):
1217 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]}
1219 # Use the seq_num for the put to test rewriting.
1220 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1221 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1223 # Check that the exposure is correct in the dataId
1224 self.assertEqual(ref.dataId["exposure"], i)
1226 # and check that we can get the dataset back with the same dataId
1227 new_metric = butler.get(datasetTypeName, dataId=dataId)
1228 self.assertEqual(new_metric, metric)
1231class FileDatastoreButlerTests(ButlerTests):
1232 """Common tests and specialization of ButlerTests for butlers backed
1233 by datastores that inherit from FileDatastore.
1234 """
1236 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool:
1237 """Check if file exists at a given path (relative to root).
1239 Test testPutTemplates verifies actual physical existance of the files
1240 in the requested location.
1241 """
1242 uri = ResourcePath(root, forceDirectory=True)
1243 return uri.join(relpath).exists()
1245 def testPutTemplates(self) -> None:
1246 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1247 butler = Butler(self.tmpConfigFile, run=self.default_run)
1249 # Add needed Dimensions
1250 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1251 butler.registry.insertDimensionData(
1252 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1253 )
1254 butler.registry.insertDimensionData(
1255 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"}
1256 )
1257 butler.registry.insertDimensionData(
1258 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"}
1259 )
1261 # Create and store a dataset
1262 metric = makeExampleMetrics()
1264 # Create two almost-identical DatasetTypes (both will use default
1265 # template)
1266 dimensions = butler.dimensions.extract(["instrument", "visit"])
1267 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1268 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1269 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1271 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1272 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1274 # Put with exactly the data ID keys needed
1275 ref = butler.put(metric, "metric1", dataId1)
1276 uri = butler.getURI(ref)
1277 self.assertTrue(uri.exists())
1278 self.assertTrue(
1279 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle")
1280 )
1282 # Check the template based on dimensions
1283 if hasattr(butler._datastore, "templates"):
1284 butler._datastore.templates.validateTemplates([ref])
1286 # Put with extra data ID keys (physical_filter is an optional
1287 # dependency); should not change template (at least the way we're
1288 # defining them to behave now; the important thing is that they
1289 # must be consistent).
1290 ref = butler.put(metric, "metric2", dataId2)
1291 uri = butler.getURI(ref)
1292 self.assertTrue(uri.exists())
1293 self.assertTrue(
1294 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle")
1295 )
1297 # Check the template based on dimensions
1298 if hasattr(butler._datastore, "templates"):
1299 butler._datastore.templates.validateTemplates([ref])
1301 # Use a template that has a typo in dimension record metadata.
1302 # Easier to test with a butler that has a ref with records attached.
1303 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
1304 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"):
1305 path = template.format(ref)
1306 self.assertEqual(path, f"a/v423/{ref.id}_fits")
1308 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
1309 with self.assertRaises(KeyError):
1310 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"):
1311 template.format(ref)
1313 # Now use a file template that will not result in unique filenames
1314 with self.assertRaises(FileTemplateValidationError):
1315 butler.put(metric, "metric3", dataId1)
1317 def testImportExport(self) -> None:
1318 # Run put/get tests just to create and populate a repo.
1319 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1320 self.runImportExportTest(storageClass)
1322 @unittest.expectedFailure
1323 def testImportExportVirtualComposite(self) -> None:
1324 # Run put/get tests just to create and populate a repo.
1325 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1326 self.runImportExportTest(storageClass)
1328 def runImportExportTest(self, storageClass: StorageClass) -> None:
1329 """Test exporting and importing.
1331 This test does an export to a temp directory and an import back
1332 into a new temp directory repo. It does not assume a posix datastore.
1333 """
1334 exportButler = self.runPutGetTest(storageClass, "test_metric")
1336 # Test that we must have a file extension.
1337 with self.assertRaises(ValueError):
1338 with exportButler.export(filename="dump", directory=".") as export:
1339 pass
1341 # Test that unknown format is not allowed.
1342 with self.assertRaises(ValueError):
1343 with exportButler.export(filename="dump.fits", directory=".") as export:
1344 pass
1346 # Test that the repo actually has at least one dataset.
1347 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1348 self.assertGreater(len(datasets), 0)
1349 # Add a DimensionRecord that's unused by those datasets.
1350 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1351 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1352 # Export and then import datasets.
1353 with safeTestTempDir(TESTDIR) as exportDir:
1354 exportFile = os.path.join(exportDir, "exports.yaml")
1355 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1356 export.saveDatasets(datasets)
1357 # Export the same datasets again. This should quietly do
1358 # nothing because of internal deduplication, and it shouldn't
1359 # complain about being asked to export the "htm7" elements even
1360 # though there aren't any in these datasets or in the database.
1361 export.saveDatasets(datasets, elements=["htm7"])
1362 # Save one of the data IDs again; this should be harmless
1363 # because of internal deduplication.
1364 export.saveDataIds([datasets[0].dataId])
1365 # Save some dimension records directly.
1366 export.saveDimensionData("skymap", [skymapRecord])
1367 self.assertTrue(os.path.exists(exportFile))
1368 with safeTestTempDir(TESTDIR) as importDir:
1369 # We always want this to be a local posix butler
1370 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1371 # Calling script.butlerImport tests the implementation of the
1372 # butler command line interface "import" subcommand. Functions
1373 # in the script folder are generally considered protected and
1374 # should not be used as public api.
1375 with open(exportFile) as f:
1376 script.butlerImport(
1377 importDir,
1378 export_file=f,
1379 directory=exportDir,
1380 transfer="auto",
1381 skip_dimensions=None,
1382 )
1383 importButler = Butler(importDir, run=self.default_run)
1384 for ref in datasets:
1385 with self.subTest(ref=ref):
1386 # Test for existence by passing in the DatasetType and
1387 # data ID separately, to avoid lookup by dataset_id.
1388 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId))
1389 self.assertEqual(
1390 list(importButler.registry.queryDimensionRecords("skymap")),
1391 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)],
1392 )
1394 def testRemoveRuns(self) -> None:
1395 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1396 butler = Butler(self.tmpConfigFile, writeable=True)
1397 # Load registry data with dimensions to hang datasets off of.
1398 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1399 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1400 # Add some RUN-type collection.
1401 run1 = "run1"
1402 butler.registry.registerRun(run1)
1403 run2 = "run2"
1404 butler.registry.registerRun(run2)
1405 # put a dataset in each
1406 metric = makeExampleMetrics()
1407 dimensions = butler.dimensions.extract(["instrument", "physical_filter"])
1408 datasetType = self.addDatasetType(
1409 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1410 )
1411 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1412 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1413 uri1 = butler.getURI(ref1)
1414 uri2 = butler.getURI(ref2)
1416 with self.assertRaises(OrphanedRecordError):
1417 butler.registry.removeDatasetType(datasetType.name)
1419 # Remove from both runs with different values for unstore.
1420 butler.removeRuns([run1], unstore=True)
1421 butler.removeRuns([run2], unstore=False)
1422 # Should be nothing in registry for either one, and datastore should
1423 # not think either exists.
1424 with self.assertRaises(MissingCollectionError):
1425 butler.registry.getCollectionType(run1)
1426 with self.assertRaises(MissingCollectionError):
1427 butler.registry.getCollectionType(run2)
1428 self.assertFalse(butler.stored(ref1))
1429 self.assertFalse(butler.stored(ref2))
1430 # The ref we unstored should be gone according to the URI, but the
1431 # one we forgot should still be around.
1432 self.assertFalse(uri1.exists())
1433 self.assertTrue(uri2.exists())
1435 # Now that the collections have been pruned we can remove the
1436 # dataset type
1437 butler.registry.removeDatasetType(datasetType.name)
1439 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm:
1440 butler.registry.removeDatasetType(("test*", "test*"))
1441 self.assertIn("not defined", "\n".join(cm.output))
1444class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1445 """PosixDatastore specialization of a butler"""
1447 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1448 fullConfigKey: str | None = ".datastore.formatters"
1449 validationCanFail = True
1450 datastoreStr = ["/tmp"]
1451 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1452 registryStr = "/gen3.sqlite3"
1454 def testPathConstructor(self) -> None:
1455 """Independent test of constructor using PathLike."""
1456 butler = Butler(self.tmpConfigFile, run=self.default_run)
1457 self.assertIsInstance(butler, Butler)
1459 # And again with a Path object with the butler yaml
1460 path = pathlib.Path(self.tmpConfigFile)
1461 butler = Butler(path, writeable=False)
1462 self.assertIsInstance(butler, Butler)
1464 # And again with a Path object without the butler yaml
1465 # (making sure we skip it if the tmp config doesn't end
1466 # in butler.yaml -- which is the case for a subclass)
1467 if self.tmpConfigFile.endswith("butler.yaml"):
1468 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1469 butler = Butler(path, writeable=False)
1470 self.assertIsInstance(butler, Butler)
1472 def testExportTransferCopy(self) -> None:
1473 """Test local export using all transfer modes"""
1474 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1475 exportButler = self.runPutGetTest(storageClass, "test_metric")
1476 # Test that the repo actually has at least one dataset.
1477 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1478 self.assertGreater(len(datasets), 0)
1479 uris = [exportButler.getURI(d) for d in datasets]
1480 assert isinstance(exportButler._datastore, FileDatastore)
1481 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]]
1483 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1485 for path in pathsInStore:
1486 # Assume local file system
1487 assert path is not None
1488 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}")
1490 for transfer in ("copy", "link", "symlink", "relsymlink"):
1491 with safeTestTempDir(TESTDIR) as exportDir:
1492 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export:
1493 export.saveDatasets(datasets)
1494 for path in pathsInStore:
1495 assert path is not None
1496 self.assertTrue(
1497 self.checkFileExists(exportDir, path),
1498 f"Check that mode {transfer} exported files",
1499 )
1501 def testPruneDatasets(self) -> None:
1502 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1503 butler = Butler(self.tmpConfigFile, writeable=True)
1504 assert isinstance(butler._datastore, FileDatastore)
1505 # Load registry data with dimensions to hang datasets off of.
1506 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1507 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1508 # Add some RUN-type collections.
1509 run1 = "run1"
1510 butler.registry.registerRun(run1)
1511 run2 = "run2"
1512 butler.registry.registerRun(run2)
1513 # put some datasets. ref1 and ref2 have the same data ID, and are in
1514 # different runs. ref3 has a different data ID.
1515 metric = makeExampleMetrics()
1516 dimensions = butler.dimensions.extract(["instrument", "physical_filter"])
1517 datasetType = self.addDatasetType(
1518 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1519 )
1520 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1521 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1522 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1524 many_stored = butler.stored_many([ref1, ref2, ref3])
1525 for ref, stored in many_stored.items():
1526 self.assertTrue(stored, f"Ref {ref} should be stored")
1528 many_exists = butler._exists_many([ref1, ref2, ref3])
1529 for ref, exists in many_exists.items():
1530 self.assertTrue(exists, f"Checking ref {ref} exists.")
1531 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored")
1533 # Simple prune.
1534 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1535 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1))
1537 many_stored = butler.stored_many([ref1, ref2, ref3])
1538 for ref, stored in many_stored.items():
1539 self.assertFalse(stored, f"Ref {ref} should not be stored")
1541 many_exists = butler._exists_many([ref1, ref2, ref3])
1542 for ref, exists in many_exists.items():
1543 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored")
1545 # Put data back.
1546 ref1_new = butler.put(metric, ref1)
1547 self.assertEqual(ref1_new, ref1) # Reuses original ID.
1548 ref2 = butler.put(metric, ref2)
1550 many_stored = butler.stored_many([ref1, ref2, ref3])
1551 self.assertTrue(many_stored[ref1])
1552 self.assertTrue(many_stored[ref2])
1553 self.assertFalse(many_stored[ref3])
1555 ref3 = butler.put(metric, ref3)
1557 many_exists = butler._exists_many([ref1, ref2, ref3])
1558 for ref, exists in many_exists.items():
1559 self.assertTrue(exists, f"Ref {ref} should not be stored")
1561 # Clear out the datasets from registry and start again.
1562 refs = [ref1, ref2, ref3]
1563 butler.pruneDatasets(refs, purge=True, unstore=True)
1564 for ref in refs:
1565 butler.put(metric, ref)
1567 # Confirm we can retrieve deferred.
1568 dref1 = butler.getDeferred(ref1) # known and exists
1569 metric1 = dref1.get()
1570 self.assertEqual(metric1, metric)
1572 # Test different forms of file availability.
1573 # Need to be in a state where:
1574 # - one ref just has registry record.
1575 # - one ref has a missing file but a datastore record.
1576 # - one ref has a missing datastore record but file is there.
1577 # - one ref does not exist anywhere.
1578 # Do not need to test a ref that has everything since that is tested
1579 # above.
1580 ref0 = DatasetRef(
1581 datasetType,
1582 DataCoordinate.standardize(
1583 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions
1584 ),
1585 run=run1,
1586 )
1588 # Delete from datastore and retain in Registry.
1589 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False)
1591 # File has been removed.
1592 uri2 = butler.getURI(ref2)
1593 uri2.remove()
1595 # Datastore has lost track.
1596 butler._datastore.forget([ref3])
1598 # First test with a standard butler.
1599 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1600 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1601 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1602 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1603 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED)
1605 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False)
1606 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1607 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1608 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN)
1609 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1610 self.assertTrue(exists_many[ref2])
1612 # Check that per-ref query gives the same answer as many query.
1613 for ref, exists in exists_many.items():
1614 self.assertEqual(butler.exists(ref, full_check=False), exists)
1616 # Get deferred checks for existence before it allows it to be
1617 # retrieved.
1618 with self.assertRaises(LookupError):
1619 butler.getDeferred(ref3) # not known, file exists
1620 dref2 = butler.getDeferred(ref2) # known but file missing
1621 with self.assertRaises(FileNotFoundError):
1622 dref2.get()
1624 # Test again with a trusting butler.
1625 butler._datastore.trustGetRequest = True
1626 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1627 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1628 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1629 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1630 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT)
1632 # When trusting we can get a deferred dataset handle that is not
1633 # known but does exist.
1634 dref3 = butler.getDeferred(ref3)
1635 metric3 = dref3.get()
1636 self.assertEqual(metric3, metric)
1638 # Check that per-ref query gives the same answer as many query.
1639 for ref, exists in exists_many.items():
1640 self.assertEqual(butler.exists(ref, full_check=True), exists)
1642 # Create a ref that surprisingly has the UUID of an existing ref
1643 # but is not the same.
1644 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id)
1645 with self.assertRaises(ValueError):
1646 butler.exists(ref_bad)
1648 # Create a ref that has a compatible storage class.
1649 ref_compat = ref2.overrideStorageClass("StructuredDataDict")
1650 exists = butler.exists(ref_compat)
1651 self.assertEqual(exists, exists_many[ref2])
1653 # Remove everything and start from scratch.
1654 butler._datastore.trustGetRequest = False
1655 butler.pruneDatasets(refs, purge=True, unstore=True)
1656 for ref in refs:
1657 butler.put(metric, ref)
1659 # These tests mess directly with the trash table and can leave the
1660 # datastore in an odd state. Do them at the end.
1661 # Check that in normal mode, deleting the record will lead to
1662 # trash not touching the file.
1663 uri1 = butler.getURI(ref1)
1664 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table
1665 butler._datastore.forget([ref1])
1666 butler._datastore.trash(ref1)
1667 butler._datastore.emptyTrash()
1668 self.assertTrue(uri1.exists())
1669 uri1.remove() # Clean it up.
1671 # Simulate execution butler setup by deleting the datastore
1672 # record but keeping the file around and trusting.
1673 butler._datastore.trustGetRequest = True
1674 uris = butler.get_many_uris([ref2, ref3])
1675 uri2 = uris[ref2].primaryURI
1676 uri3 = uris[ref3].primaryURI
1677 self.assertTrue(uri2.exists())
1678 self.assertTrue(uri3.exists())
1680 # Remove the datastore record.
1681 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table
1682 butler._datastore.forget([ref2])
1683 self.assertTrue(uri2.exists())
1684 butler._datastore.trash([ref2, ref3])
1685 # Immediate removal for ref2 file
1686 self.assertFalse(uri2.exists())
1687 # But ref3 has to wait for the empty.
1688 self.assertTrue(uri3.exists())
1689 butler._datastore.emptyTrash()
1690 self.assertFalse(uri3.exists())
1692 # Clear out the datasets from registry.
1693 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1695 def testPytypeCoercion(self) -> None:
1696 """Test python type coercion on Butler.get and put."""
1697 # Store some data with the normal example storage class.
1698 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1699 datasetTypeName = "test_metric"
1700 butler = self.runPutGetTest(storageClass, datasetTypeName)
1702 dataId = {"instrument": "DummyCamComp", "visit": 423}
1703 metric = butler.get(datasetTypeName, dataId=dataId)
1704 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample")
1706 datasetType_ori = butler.registry.getDatasetType(datasetTypeName)
1707 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents")
1709 # Now need to hack the registry dataset type definition.
1710 # There is no API for this.
1711 assert isinstance(butler._registry, SqlRegistry)
1712 manager = butler._registry._managers.datasets
1713 assert hasattr(manager, "_db") and hasattr(manager, "_static")
1714 manager._db.update(
1715 manager._static.dataset_type,
1716 {"name": datasetTypeName},
1717 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"},
1718 )
1720 # Force reset of dataset type cache
1721 butler.registry.refresh()
1723 datasetType_new = butler.registry.getDatasetType(datasetTypeName)
1724 self.assertEqual(datasetType_new.name, datasetType_ori.name)
1725 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel")
1727 metric_model = butler.get(datasetTypeName, dataId=dataId)
1728 self.assertNotEqual(type(metric_model), type(metric))
1729 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel")
1731 # Put the model and read it back to show that everything now
1732 # works as normal.
1733 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424)
1734 metric_model_new = butler.get(metric_ref)
1735 self.assertEqual(metric_model_new, metric_model)
1737 # Hack the storage class again to something that will fail on the
1738 # get with no conversion class.
1739 manager._db.update(
1740 manager._static.dataset_type,
1741 {"name": datasetTypeName},
1742 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"},
1743 )
1744 butler.registry.refresh()
1746 with self.assertRaises(ValueError):
1747 butler.get(datasetTypeName, dataId=dataId)
1750@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1751class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1752 """PosixDatastore specialization of a butler using Postgres"""
1754 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1755 fullConfigKey = ".datastore.formatters"
1756 validationCanFail = True
1757 datastoreStr = ["/tmp"]
1758 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1759 registryStr = "PostgreSQL@test"
1760 postgresql: Any
1762 @staticmethod
1763 def _handler(postgresql: Any) -> None:
1764 engine = sqlalchemy.engine.create_engine(postgresql.url())
1765 with engine.begin() as connection:
1766 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
1768 @classmethod
1769 def setUpClass(cls) -> None:
1770 # Create the postgres test server.
1771 cls.postgresql = testing.postgresql.PostgresqlFactory(
1772 cache_initialized_db=True, on_initialized=cls._handler
1773 )
1774 super().setUpClass()
1776 @classmethod
1777 def tearDownClass(cls) -> None:
1778 # Clean up any lingering SQLAlchemy engines/connections
1779 # so they're closed before we shut down the server.
1780 gc.collect()
1781 cls.postgresql.clear_cache()
1782 super().tearDownClass()
1784 def setUp(self) -> None:
1785 self.server = self.postgresql()
1787 # Need to add a registry section to the config.
1788 self._temp_config = False
1789 config = Config(self.configFile)
1790 config["registry", "db"] = self.server.url()
1791 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh:
1792 config.dump(fh)
1793 self.configFile = fh.name
1794 self._temp_config = True
1795 super().setUp()
1797 def tearDown(self) -> None:
1798 self.server.stop()
1799 if self._temp_config and os.path.exists(self.configFile):
1800 os.remove(self.configFile)
1801 super().tearDown()
1803 def testMakeRepo(self) -> None:
1804 # The base class test assumes that it's using sqlite and assumes
1805 # the config file is acceptable to sqlite.
1806 raise unittest.SkipTest("Postgres config is not compatible with this test.")
1809class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1810 """InMemoryDatastore specialization of a butler"""
1812 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1813 fullConfigKey = None
1814 useTempRoot = False
1815 validationCanFail = False
1816 datastoreStr = ["datastore='InMemory"]
1817 datastoreName = ["InMemoryDatastore@"]
1818 registryStr = "/gen3.sqlite3"
1820 def testIngest(self) -> None:
1821 pass
1824class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1825 """PosixDatastore specialization"""
1827 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1828 fullConfigKey = ".datastore.datastores.1.formatters"
1829 validationCanFail = True
1830 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1831 datastoreName = [
1832 "InMemoryDatastore@",
1833 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1834 "SecondDatastore",
1835 ]
1836 registryStr = "/gen3.sqlite3"
1839class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1840 """Test that a yaml file in one location can refer to a root in another."""
1842 datastoreStr = ["dir1"]
1843 # Disable the makeRepo test since we are deliberately not using
1844 # butler.yaml as the config name.
1845 fullConfigKey = None
1847 def setUp(self) -> None:
1848 self.root = makeTestTempDir(TESTDIR)
1850 # Make a new repository in one place
1851 self.dir1 = os.path.join(self.root, "dir1")
1852 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1854 # Move the yaml file to a different place and add a "root"
1855 self.dir2 = os.path.join(self.root, "dir2")
1856 os.makedirs(self.dir2, exist_ok=True)
1857 configFile1 = os.path.join(self.dir1, "butler.yaml")
1858 config = Config(configFile1)
1859 config["root"] = self.dir1
1860 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1861 config.dumpToUri(configFile2)
1862 os.remove(configFile1)
1863 self.tmpConfigFile = configFile2
1865 def testFileLocations(self) -> None:
1866 self.assertNotEqual(self.dir1, self.dir2)
1867 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1868 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1869 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1872class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1873 """Test that a config file created by makeRepo outside of repo works."""
1875 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1877 def setUp(self) -> None:
1878 self.root = makeTestTempDir(TESTDIR)
1879 self.root2 = makeTestTempDir(TESTDIR)
1881 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1882 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1884 def tearDown(self) -> None:
1885 if os.path.exists(self.root2):
1886 shutil.rmtree(self.root2, ignore_errors=True)
1887 super().tearDown()
1889 def testConfigExistence(self) -> None:
1890 c = Config(self.tmpConfigFile)
1891 uri_config = ResourcePath(c["root"])
1892 uri_expected = ResourcePath(self.root, forceDirectory=True)
1893 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1894 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1896 def testPutGet(self) -> None:
1897 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1898 self.runPutGetTest(storageClass, "test_metric")
1901class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1902 """Test that a config file created by makeRepo outside of repo works."""
1904 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1906 def setUp(self) -> None:
1907 self.root = makeTestTempDir(TESTDIR)
1908 self.root2 = makeTestTempDir(TESTDIR)
1910 self.tmpConfigFile = self.root2
1911 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1913 def testConfigExistence(self) -> None:
1914 # Append the yaml file else Config constructor does not know the file
1915 # type.
1916 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1917 super().testConfigExistence()
1920class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1921 """Test that a config file created by makeRepo outside of repo works."""
1923 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1925 def setUp(self) -> None:
1926 self.root = makeTestTempDir(TESTDIR)
1927 self.root2 = makeTestTempDir(TESTDIR)
1929 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl()
1930 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1933@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1934class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1935 """S3Datastore specialization of a butler; an S3 storage Datastore +
1936 a local in-memory SqlRegistry.
1937 """
1939 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1940 fullConfigKey = None
1941 validationCanFail = True
1943 bucketName = "anybucketname"
1944 """Name of the Bucket that will be used in the tests. The name is read from
1945 the config file used with the tests during set-up.
1946 """
1948 root = "butlerRoot/"
1949 """Root repository directory expected to be used in case useTempRoot=False.
1950 Otherwise the root is set to a 20 characters long randomly generated string
1951 during set-up.
1952 """
1954 datastoreStr = [f"datastore={root}"]
1955 """Contains all expected root locations in a format expected to be
1956 returned by Butler stringification.
1957 """
1959 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1960 """The expected format of the S3 Datastore string."""
1962 registryStr = "/gen3.sqlite3"
1963 """Expected format of the Registry string."""
1965 mock_s3 = mock_s3()
1966 """The mocked s3 interface from moto."""
1968 def genRoot(self) -> str:
1969 """Return a random string of len 20 to serve as a root
1970 name for the temporary bucket repo.
1972 This is equivalent to tempfile.mkdtemp as this is what self.root
1973 becomes when useTempRoot is True.
1974 """
1975 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1976 return rndstr + "/"
1978 def setUp(self) -> None:
1979 config = Config(self.configFile)
1980 uri = ResourcePath(config[".datastore.datastore.root"])
1981 self.bucketName = uri.netloc
1983 # Enable S3 mocking of tests.
1984 self.mock_s3.start()
1986 # set up some fake credentials if they do not exist
1987 self.usingDummyCredentials = setAwsEnvCredentials()
1989 if self.useTempRoot:
1990 self.root = self.genRoot()
1991 rooturi = f"s3://{self.bucketName}/{self.root}"
1992 config.update({"datastore": {"datastore": {"root": rooturi}}})
1994 # need local folder to store registry database
1995 self.reg_dir = makeTestTempDir(TESTDIR)
1996 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1998 # MOTO needs to know that we expect Bucket bucketname to exist
1999 # (this used to be the class attribute bucketName)
2000 s3 = boto3.resource("s3")
2001 s3.create_bucket(Bucket=self.bucketName)
2003 self.datastoreStr = [f"datastore='{rooturi}'"]
2004 self.datastoreName = [f"FileDatastore@{rooturi}"]
2005 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
2006 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
2008 def tearDown(self) -> None:
2009 s3 = boto3.resource("s3")
2010 bucket = s3.Bucket(self.bucketName)
2011 try:
2012 bucket.objects.all().delete()
2013 except botocore.exceptions.ClientError as e:
2014 if e.response["Error"]["Code"] == "404":
2015 # the key was not reachable - pass
2016 pass
2017 else:
2018 raise
2020 bucket = s3.Bucket(self.bucketName)
2021 bucket.delete()
2023 # Stop the S3 mock.
2024 self.mock_s3.stop()
2026 # unset any potentially set dummy credentials
2027 if self.usingDummyCredentials:
2028 unsetAwsEnvCredentials()
2030 if self.reg_dir is not None and os.path.exists(self.reg_dir):
2031 shutil.rmtree(self.reg_dir, ignore_errors=True)
2033 if self.useTempRoot and os.path.exists(self.root):
2034 shutil.rmtree(self.root, ignore_errors=True)
2036 super().tearDown()
2039class PosixDatastoreTransfers(unittest.TestCase):
2040 """Test data transfers between butlers.
2042 Test for different managers. UUID to UUID and integer to integer are
2043 tested. UUID to integer is not supported since we do not currently
2044 want to allow that. Integer to UUID is supported with the caveat
2045 that UUID4 will be generated and this will be incorrect for raw
2046 dataset types. The test ignores that.
2047 """
2049 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2050 storageClassFactory: StorageClassFactory
2052 @classmethod
2053 def setUpClass(cls) -> None:
2054 cls.storageClassFactory = StorageClassFactory()
2055 cls.storageClassFactory.addFromConfig(cls.configFile)
2057 def setUp(self) -> None:
2058 self.root = makeTestTempDir(TESTDIR)
2059 self.config = Config(self.configFile)
2061 def tearDown(self) -> None:
2062 removeTestTempDir(self.root)
2064 def create_butler(self, manager: str, label: str) -> Butler:
2065 config = Config(self.configFile)
2066 config["registry", "managers", "datasets"] = manager
2067 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True)
2069 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None:
2070 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
2071 if manager1 is None:
2072 manager1 = default
2073 if manager2 is None:
2074 manager2 = default
2075 self.source_butler = self.create_butler(manager1, "1")
2076 self.target_butler = self.create_butler(manager2, "2")
2078 def testTransferUuidToUuid(self) -> None:
2079 self.create_butlers()
2080 self.assertButlerTransfers()
2082 def _enable_trust(self, datastore: Datastore) -> None:
2083 datastores = getattr(datastore, "datastores", [datastore])
2084 for this_datastore in datastores:
2085 if hasattr(this_datastore, "trustGetRequest"):
2086 this_datastore.trustGetRequest = True
2088 def testTransferMissing(self) -> None:
2089 """Test transfers where datastore records are missing.
2091 This is how execution butler works.
2092 """
2093 self.create_butlers()
2095 # Configure the source butler to allow trust.
2096 self._enable_trust(self.source_butler._datastore)
2098 self.assertButlerTransfers(purge=True)
2100 def testTransferMissingDisassembly(self) -> None:
2101 """Test transfers where datastore records are missing.
2103 This is how execution butler works.
2104 """
2105 self.create_butlers()
2107 # Configure the source butler to allow trust.
2108 self._enable_trust(self.source_butler._datastore)
2110 # Test disassembly.
2111 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
2113 def testAbsoluteURITransferDirect(self) -> None:
2114 """Test transfer using an absolute URI."""
2115 self._absolute_transfer("auto")
2117 def testAbsoluteURITransferCopy(self) -> None:
2118 """Test transfer using an absolute URI."""
2119 self._absolute_transfer("copy")
2121 def _absolute_transfer(self, transfer: str) -> None:
2122 self.create_butlers()
2124 storageClassName = "StructuredData"
2125 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2126 datasetTypeName = "random_data"
2127 run = "run1"
2128 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2130 dimensions = self.source_butler.dimensions.extract(())
2131 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2132 self.source_butler.registry.registerDatasetType(datasetType)
2134 metrics = makeExampleMetrics()
2135 with ResourcePath.temporary_uri(suffix=".json") as temp:
2136 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions)
2137 source_refs = [DatasetRef(datasetType, dataId, run=run)]
2138 temp.write(json.dumps(metrics.exportAsDict()).encode())
2139 dataset = FileDataset(path=temp, refs=source_refs)
2140 self.source_butler.ingest(dataset, transfer="direct")
2142 self.target_butler.transfer_from(
2143 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer
2144 )
2146 uri = self.target_butler.getURI(dataset.refs[0])
2147 if transfer == "auto":
2148 self.assertEqual(uri, temp)
2149 else:
2150 self.assertNotEqual(uri, temp)
2152 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None:
2153 """Test that a run can be transferred to another butler."""
2154 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2155 datasetTypeName = "random_data"
2157 # Test will create 3 collections and we will want to transfer
2158 # two of those three.
2159 runs = ["run1", "run2", "other"]
2161 # Also want to use two different dataset types to ensure that
2162 # grouping works.
2163 datasetTypeNames = ["random_data", "random_data_2"]
2165 # Create the run collections in the source butler.
2166 for run in runs:
2167 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2169 # Create dimensions in source butler.
2170 n_exposures = 30
2171 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
2172 self.source_butler.registry.insertDimensionData(
2173 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
2174 )
2175 self.source_butler.registry.insertDimensionData(
2176 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
2177 )
2179 for i in range(n_exposures):
2180 self.source_butler.registry.insertDimensionData(
2181 "exposure",
2182 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"},
2183 )
2185 # Create dataset types in the source butler.
2186 dimensions = self.source_butler.dimensions.extract(["instrument", "exposure"])
2187 for datasetTypeName in datasetTypeNames:
2188 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2189 self.source_butler.registry.registerDatasetType(datasetType)
2191 # Write a dataset to an unrelated run -- this will ensure that
2192 # we are rewriting integer dataset ids in the target if necessary.
2193 # Will not be relevant for UUID.
2194 run = "distraction"
2195 butler = Butler(butler=self.source_butler, run=run)
2196 butler.put(
2197 makeExampleMetrics(),
2198 datasetTypeName,
2199 exposure=1,
2200 instrument="DummyCamComp",
2201 physical_filter="d-r",
2202 )
2204 # Write some example metrics to the source
2205 butler = Butler(butler=self.source_butler)
2207 # Set of DatasetRefs that should be in the list of refs to transfer
2208 # but which will not be transferred.
2209 deleted: set[DatasetRef] = set()
2211 n_expected = 20 # Number of datasets expected to be transferred
2212 source_refs = []
2213 for i in range(n_exposures):
2214 # Put a third of datasets into each collection, only retain
2215 # two thirds.
2216 index = i % 3
2217 run = runs[index]
2218 datasetTypeName = datasetTypeNames[i % 2]
2220 metric = MetricsExample(
2221 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)]
2222 )
2223 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"}
2224 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
2226 # Remove the datastore record using low-level API, but only
2227 # for a specific index.
2228 if purge and index == 1:
2229 # For one of these delete the file as well.
2230 # This allows the "missing" code to filter the
2231 # file out.
2232 # Access the individual datastores.
2233 datastores = []
2234 if hasattr(butler._datastore, "datastores"):
2235 datastores.extend(butler._datastore.datastores)
2236 else:
2237 datastores.append(butler._datastore)
2239 if not deleted:
2240 # For a chained datastore we need to remove
2241 # files in each chain.
2242 for datastore in datastores:
2243 # The file might not be known to the datastore
2244 # if constraints are used.
2245 try:
2246 primary, uris = datastore.getURIs(ref)
2247 except FileNotFoundError:
2248 continue
2249 if primary and primary.scheme != "mem":
2250 primary.remove()
2251 for uri in uris.values():
2252 if uri.scheme != "mem":
2253 uri.remove()
2254 n_expected -= 1
2255 deleted.add(ref)
2257 # Remove the datastore record.
2258 for datastore in datastores:
2259 if hasattr(datastore, "removeStoredItemInfo"):
2260 datastore.removeStoredItemInfo(ref)
2262 if index < 2:
2263 source_refs.append(ref)
2264 if ref not in deleted:
2265 new_metric = butler.get(ref)
2266 self.assertEqual(new_metric, metric)
2268 # Create some bad dataset types to ensure we check for inconsistent
2269 # definitions.
2270 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
2271 for datasetTypeName in datasetTypeNames:
2272 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
2273 self.target_butler.registry.registerDatasetType(datasetType)
2274 with self.assertRaises(ConflictingDefinitionError) as cm:
2275 self.target_butler.transfer_from(self.source_butler, source_refs)
2276 self.assertIn("dataset type differs", str(cm.exception))
2278 # And remove the bad definitions.
2279 for datasetTypeName in datasetTypeNames:
2280 self.target_butler.registry.removeDatasetType(datasetTypeName)
2282 # Transfer without creating dataset types should fail.
2283 with self.assertRaises(KeyError):
2284 self.target_butler.transfer_from(self.source_butler, source_refs)
2286 # Transfer without creating dimensions should fail.
2287 with self.assertRaises(ConflictingDefinitionError) as cm:
2288 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True)
2289 self.assertIn("dimension", str(cm.exception))
2291 # The failed transfer above leaves registry in an inconsistent
2292 # state because the run is created but then rolled back without
2293 # the collection cache being cleared. For now force a refresh.
2294 # Can remove with DM-35498.
2295 self.target_butler.registry.refresh()
2297 # Now transfer them to the second butler, including dimensions.
2298 with self.assertLogs(level=logging.DEBUG) as log_cm:
2299 transferred = self.target_butler.transfer_from(
2300 self.source_butler,
2301 source_refs,
2302 register_dataset_types=True,
2303 transfer_dimensions=True,
2304 )
2305 self.assertEqual(len(transferred), n_expected)
2306 log_output = ";".join(log_cm.output)
2308 # A ChainedDatastore will use the in-memory datastore for mexists
2309 # so we can not rely on the mexists log message.
2310 self.assertIn("Number of datastore records found in source", log_output)
2311 self.assertIn("Creating output run", log_output)
2313 # Do the transfer twice to ensure that it will do nothing extra.
2314 # Only do this if purge=True because it does not work for int
2315 # dataset_id.
2316 if purge:
2317 # This should not need to register dataset types.
2318 transferred = self.target_butler.transfer_from(self.source_butler, source_refs)
2319 self.assertEqual(len(transferred), n_expected)
2321 # Also do an explicit low-level transfer to trigger some
2322 # edge cases.
2323 with self.assertLogs(level=logging.DEBUG) as log_cm:
2324 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs)
2325 log_output = ";".join(log_cm.output)
2326 self.assertIn("no file artifacts exist", log_output)
2328 with self.assertRaises((TypeError, AttributeError)):
2329 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore
2331 with self.assertRaises(ValueError):
2332 self.target_butler._datastore.transfer_from(
2333 self.source_butler._datastore, source_refs, transfer="split"
2334 )
2336 # Now try to get the same refs from the new butler.
2337 for ref in source_refs:
2338 if ref not in deleted:
2339 new_metric = self.target_butler.get(ref)
2340 old_metric = self.source_butler.get(ref)
2341 self.assertEqual(new_metric, old_metric)
2343 # Now prune run2 collection and create instead a CHAINED collection.
2344 # This should block the transfer.
2345 self.target_butler.removeRuns(["run2"], unstore=True)
2346 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
2347 with self.assertRaises(CollectionTypeError):
2348 # Re-importing the run1 datasets can be problematic if they
2349 # use integer IDs so filter those out.
2350 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
2351 self.target_butler.transfer_from(self.source_butler, to_transfer)
2354class ChainedDatastoreTransfers(PosixDatastoreTransfers):
2355 """Test transfers using a chained datastore."""
2357 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
2360class NullDatastoreTestCase(unittest.TestCase):
2361 """Test that we can fall back to a null datastore."""
2363 # Need a good config to create the repo.
2364 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2365 storageClassFactory: StorageClassFactory
2367 @classmethod
2368 def setUpClass(cls) -> None:
2369 cls.storageClassFactory = StorageClassFactory()
2370 cls.storageClassFactory.addFromConfig(cls.configFile)
2372 def setUp(self) -> None:
2373 """Create a new butler root for each test."""
2374 self.root = makeTestTempDir(TESTDIR)
2375 Butler.makeRepo(self.root, config=Config(self.configFile))
2377 def tearDown(self) -> None:
2378 removeTestTempDir(self.root)
2380 def test_fallback(self) -> None:
2381 # Read the butler config and mess with the datastore section.
2382 bad_config = Config(os.path.join(self.root, "butler.yaml"))
2383 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore"
2385 with self.assertRaises(RuntimeError):
2386 Butler(bad_config)
2388 butler = Butler(bad_config, writeable=True, without_datastore=True)
2389 self.assertIsInstance(butler._datastore, NullDatastore)
2391 # Check that registry is working.
2392 butler.registry.registerRun("MYRUN")
2393 collections = butler.registry.queryCollections(...)
2394 self.assertIn("MYRUN", set(collections))
2396 # Create a ref.
2397 dimensions = butler.dimensions.extract([])
2398 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
2399 datasetTypeName = "metric"
2400 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2401 butler.registry.registerDatasetType(datasetType)
2402 ref = DatasetRef(datasetType, {}, run="MYRUN")
2404 # Check that datastore will complain.
2405 with self.assertRaises(FileNotFoundError):
2406 butler.get(ref)
2407 with self.assertRaises(FileNotFoundError):
2408 butler.getURI(ref)
2411def setup_module(module: types.ModuleType) -> None:
2412 """Set up the module for pytest."""
2413 clean_environment()
2416if __name__ == "__main__":
2417 clean_environment()
2418 unittest.main()