Coverage for tests/test_butler.py: 17%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import logging
26import os
27import posixpath
28import unittest
29import tempfile
30import shutil
31import pickle
32import string
33import random
34import time
35import socket
36import pathlib
38try:
39 import boto3
40 import botocore
41 from moto import mock_s3
42except ImportError:
43 boto3 = None
45 def mock_s3(cls):
46 """A no-op decorator in case moto mock_s3 can not be imported.
47 """
48 return cls
50try:
51 from cheroot import wsgi
52 from wsgidav.wsgidav_app import WsgiDAVApp
53except ImportError:
54 WsgiDAVApp = None
56import astropy.time
57from threading import Thread
58from tempfile import gettempdir
59from lsst.utils import doImport
60from lsst.daf.butler import Butler, Config, ButlerConfig
61from lsst.daf.butler import StorageClassFactory
62from lsst.daf.butler import DatasetType, DatasetRef, DatasetIdGenEnum
63from lsst.daf.butler import FileTemplateValidationError, ValidationError
64from lsst.daf.butler import FileDataset
65from lsst.daf.butler import CollectionSearch, CollectionType
66from lsst.daf.butler import ButlerURI
67from lsst.daf.butler import script
68from lsst.daf.butler.registry import MissingCollectionError, ConflictingDefinitionError
69from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
70from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials,
71 unsetAwsEnvCredentials)
72from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint
74from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
75from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir
77TESTDIR = os.path.abspath(os.path.dirname(__file__))
80def makeExampleMetrics():
81 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
82 {"a": [1, 2, 3],
83 "b": {"blue": 5, "red": "green"}},
84 [563, 234, 456.7, 752, 8, 9, 27]
85 )
88class TransactionTestError(Exception):
89 """Specific error for testing transactions, to prevent misdiagnosing
90 that might otherwise occur when a standard exception is used.
91 """
92 pass
95class ButlerConfigTests(unittest.TestCase):
96 """Simple tests for ButlerConfig that are not tested in other test cases.
97 """
99 def testSearchPath(self):
100 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
101 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
102 config1 = ButlerConfig(configFile)
103 self.assertNotIn("testConfigs", "\n".join(cm.output))
105 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
106 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
107 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
108 self.assertIn("testConfigs", "\n".join(cm.output))
110 key = ("datastore", "records", "table")
111 self.assertNotEqual(config1[key], config2[key])
112 self.assertEqual(config2[key], "override_record")
115class ButlerPutGetTests:
116 """Helper method for running a suite of put/get tests from different
117 butler configurations."""
119 root = None
121 @staticmethod
122 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
123 """Create a DatasetType and register it
124 """
125 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
126 registry.registerDatasetType(datasetType)
127 return datasetType
129 @classmethod
130 def setUpClass(cls):
131 cls.storageClassFactory = StorageClassFactory()
132 cls.storageClassFactory.addFromConfig(cls.configFile)
134 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
135 datasetType = datasetRef.datasetType
136 dataId = datasetRef.dataId
137 deferred = butler.getDirectDeferred(datasetRef)
139 for component in components:
140 compTypeName = datasetType.componentTypeName(component)
141 result = butler.get(compTypeName, dataId, collections=collections)
142 self.assertEqual(result, getattr(reference, component))
143 result_deferred = deferred.get(component=component)
144 self.assertEqual(result_deferred, result)
146 def tearDown(self):
147 removeTestTempDir(self.root)
149 def runPutGetTest(self, storageClass, datasetTypeName):
150 # New datasets will be added to run and tag, but we will only look in
151 # tag when looking up datasets.
152 run = "ingest"
153 butler = Butler(self.tmpConfigFile, run=run)
155 collections = set(butler.registry.queryCollections())
156 self.assertEqual(collections, set([run]))
158 # Create and register a DatasetType
159 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
161 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
163 # Add needed Dimensions
164 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
165 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
166 "name": "d-r",
167 "band": "R"})
168 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
169 "id": 1,
170 "name": "default"})
171 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
172 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
173 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
174 "name": "fourtwentythree", "physical_filter": "d-r",
175 "visit_system": 1, "datetime_begin": visit_start,
176 "datetime_end": visit_end})
178 # Add a second visit for some later tests
179 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
180 "name": "fourtwentyfour", "physical_filter": "d-r",
181 "visit_system": 1})
183 # Create and store a dataset
184 metric = makeExampleMetrics()
185 dataId = {"instrument": "DummyCamComp", "visit": 423}
187 # Create a DatasetRef for put
188 refIn = DatasetRef(datasetType, dataId, id=None)
190 # Put with a preexisting id should fail
191 with self.assertRaises(ValueError):
192 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
194 # Put and remove the dataset once as a DatasetRef, once as a dataId,
195 # and once with a DatasetType
197 # Keep track of any collections we add and do not clean up
198 expected_collections = {run}
200 counter = 0
201 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
202 # Since we are using subTest we can get cascading failures
203 # here with the first attempt failing and the others failing
204 # immediately because the dataset already exists. Work around
205 # this by using a distinct run collection each time
206 counter += 1
207 this_run = f"put_run_{counter}"
208 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
209 expected_collections.update({this_run})
211 with self.subTest(args=args):
212 ref = butler.put(metric, *args, run=this_run)
213 self.assertIsInstance(ref, DatasetRef)
215 # Test getDirect
216 metricOut = butler.getDirect(ref)
217 self.assertEqual(metric, metricOut)
218 # Test get
219 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
220 self.assertEqual(metric, metricOut)
221 # Test get with a datasetRef
222 metricOut = butler.get(ref, collections=this_run)
223 self.assertEqual(metric, metricOut)
224 # Test getDeferred with dataId
225 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
226 self.assertEqual(metric, metricOut)
227 # Test getDeferred with a datasetRef
228 metricOut = butler.getDeferred(ref, collections=this_run).get()
229 self.assertEqual(metric, metricOut)
230 # and deferred direct with ref
231 metricOut = butler.getDirectDeferred(ref).get()
232 self.assertEqual(metric, metricOut)
234 # Check we can get components
235 if storageClass.isComposite():
236 self.assertGetComponents(butler, ref,
237 ("summary", "data", "output"), metric,
238 collections=this_run)
240 # Can the artifacts themselves be retrieved?
241 if not butler.datastore.isEphemeral:
242 root_uri = ButlerURI(self.root)
244 for preserve_path in (True, False):
245 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
246 # Use copy so that we can test that overwrite
247 # protection works (using "auto" for File URIs would
248 # use hard links and subsequent transfer would work
249 # because it knows they are the same file).
250 transferred = butler.retrieveArtifacts([ref], destination,
251 preserve_path=preserve_path, transfer="copy")
252 self.assertGreater(len(transferred), 0)
253 artifacts = list(ButlerURI.findFileResources([destination]))
254 self.assertEqual(set(transferred), set(artifacts))
256 for artifact in transferred:
257 path_in_destination = artifact.relative_to(destination)
258 self.assertIsNotNone(path_in_destination)
260 # when path is not preserved there should not be
261 # any path separators.
262 num_seps = path_in_destination.count("/")
263 if preserve_path:
264 self.assertGreater(num_seps, 0)
265 else:
266 self.assertEqual(num_seps, 0)
268 primary_uri, secondary_uris = butler.datastore.getURIs(ref)
269 n_uris = len(secondary_uris)
270 if primary_uri:
271 n_uris += 1
272 self.assertEqual(len(artifacts), n_uris, "Comparing expected artifacts vs actual:"
273 f" {artifacts} vs {primary_uri} and {secondary_uris}")
275 if preserve_path:
276 # No need to run these twice
277 with self.assertRaises(ValueError):
278 butler.retrieveArtifacts([ref], destination, transfer="move")
280 with self.assertRaises(FileExistsError):
281 butler.retrieveArtifacts([ref], destination)
283 transferred_again = butler.retrieveArtifacts([ref], destination,
284 preserve_path=preserve_path,
285 overwrite=True)
286 self.assertEqual(set(transferred_again), set(transferred))
288 # Now remove the dataset completely.
289 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run)
290 # Lookup with original args should still fail.
291 with self.assertRaises(LookupError):
292 butler.datasetExists(*args, collections=this_run)
293 # getDirect() should still fail.
294 with self.assertRaises(FileNotFoundError):
295 butler.getDirect(ref)
296 # Registry shouldn't be able to find it by dataset_id anymore.
297 self.assertIsNone(butler.registry.getDataset(ref.id))
299 # Do explicit registry removal since we know they are
300 # empty
301 butler.registry.removeCollection(this_run)
302 expected_collections.remove(this_run)
304 # Put the dataset again, since the last thing we did was remove it
305 # and we want to use the default collection.
306 ref = butler.put(metric, refIn)
308 # Get with parameters
309 stop = 4
310 sliced = butler.get(ref, parameters={"slice": slice(stop)})
311 self.assertNotEqual(metric, sliced)
312 self.assertEqual(metric.summary, sliced.summary)
313 self.assertEqual(metric.output, sliced.output)
314 self.assertEqual(metric.data[:stop], sliced.data)
315 # getDeferred with parameters
316 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
317 self.assertNotEqual(metric, sliced)
318 self.assertEqual(metric.summary, sliced.summary)
319 self.assertEqual(metric.output, sliced.output)
320 self.assertEqual(metric.data[:stop], sliced.data)
321 # getDeferred with deferred parameters
322 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
323 self.assertNotEqual(metric, sliced)
324 self.assertEqual(metric.summary, sliced.summary)
325 self.assertEqual(metric.output, sliced.output)
326 self.assertEqual(metric.data[:stop], sliced.data)
328 if storageClass.isComposite():
329 # Check that components can be retrieved
330 metricOut = butler.get(ref.datasetType.name, dataId)
331 compNameS = ref.datasetType.componentTypeName("summary")
332 compNameD = ref.datasetType.componentTypeName("data")
333 summary = butler.get(compNameS, dataId)
334 self.assertEqual(summary, metric.summary)
335 data = butler.get(compNameD, dataId)
336 self.assertEqual(data, metric.data)
338 if "counter" in storageClass.derivedComponents:
339 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
340 self.assertEqual(count, len(data))
342 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
343 parameters={"slice": slice(stop)})
344 self.assertEqual(count, stop)
346 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
347 summary = butler.getDirect(compRef)
348 self.assertEqual(summary, metric.summary)
350 # Create a Dataset type that has the same name but is inconsistent.
351 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
352 self.storageClassFactory.getStorageClass("Config"))
354 # Getting with a dataset type that does not match registry fails
355 with self.assertRaises(ValueError):
356 butler.get(inconsistentDatasetType, dataId)
358 # Combining a DatasetRef with a dataId should fail
359 with self.assertRaises(ValueError):
360 butler.get(ref, dataId)
361 # Getting with an explicit ref should fail if the id doesn't match
362 with self.assertRaises(ValueError):
363 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
365 # Getting a dataset with unknown parameters should fail
366 with self.assertRaises(KeyError):
367 butler.get(ref, parameters={"unsupported": True})
369 # Check we have a collection
370 collections = set(butler.registry.queryCollections())
371 self.assertEqual(collections, expected_collections)
373 # Clean up to check that we can remove something that may have
374 # already had a component removed
375 butler.pruneDatasets([ref], unstore=True, purge=True)
377 # Check that we can configure a butler to accept a put even
378 # if it already has the dataset in registry.
379 ref = butler.put(metric, refIn)
381 # Repeat put will fail.
382 with self.assertRaises(ConflictingDefinitionError):
383 butler.put(metric, refIn)
385 # Remove the datastore entry.
386 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
388 # Put will still fail
389 with self.assertRaises(ConflictingDefinitionError):
390 butler.put(metric, refIn)
392 # Allow the put to succeed
393 butler._allow_put_of_predefined_dataset = True
394 ref2 = butler.put(metric, refIn)
395 self.assertEqual(ref2.id, ref.id)
397 # A second put will still fail but with a different exception
398 # than before.
399 with self.assertRaises(ConflictingDefinitionError):
400 butler.put(metric, refIn)
402 # Reset the flag to avoid confusion
403 butler._allow_put_of_predefined_dataset = False
405 # Leave the dataset in place since some downstream tests require
406 # something to be present
408 return butler
410 def testDeferredCollectionPassing(self):
411 # Construct a butler with no run or collection, but make it writeable.
412 butler = Butler(self.tmpConfigFile, writeable=True)
413 # Create and register a DatasetType
414 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
415 datasetType = self.addDatasetType("example", dimensions,
416 self.storageClassFactory.getStorageClass("StructuredData"),
417 butler.registry)
418 # Add needed Dimensions
419 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
420 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
421 "name": "d-r",
422 "band": "R"})
423 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
424 "name": "fourtwentythree", "physical_filter": "d-r"})
425 dataId = {"instrument": "DummyCamComp", "visit": 423}
426 # Create dataset.
427 metric = makeExampleMetrics()
428 # Register a new run and put dataset.
429 run = "deferred"
430 self.assertTrue(butler.registry.registerRun(run))
431 # Second time it will be allowed but indicate no-op
432 self.assertFalse(butler.registry.registerRun(run))
433 ref = butler.put(metric, datasetType, dataId, run=run)
434 # Putting with no run should fail with TypeError.
435 with self.assertRaises(TypeError):
436 butler.put(metric, datasetType, dataId)
437 # Dataset should exist.
438 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
439 # We should be able to get the dataset back, but with and without
440 # a deferred dataset handle.
441 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
442 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
443 # Trying to find the dataset without any collection is a TypeError.
444 with self.assertRaises(TypeError):
445 butler.datasetExists(datasetType, dataId)
446 with self.assertRaises(TypeError):
447 butler.get(datasetType, dataId)
448 # Associate the dataset with a different collection.
449 butler.registry.registerCollection("tagged")
450 butler.registry.associate("tagged", [ref])
451 # Deleting the dataset from the new collection should make it findable
452 # in the original collection.
453 butler.pruneDatasets([ref], tags=["tagged"])
454 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
457class ButlerTests(ButlerPutGetTests):
458 """Tests for Butler.
459 """
460 useTempRoot = True
462 def setUp(self):
463 """Create a new butler root for each test."""
464 self.root = makeTestTempDir(TESTDIR)
465 Butler.makeRepo(self.root, config=Config(self.configFile))
466 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
468 def testConstructor(self):
469 """Independent test of constructor.
470 """
471 butler = Butler(self.tmpConfigFile, run="ingest")
472 self.assertIsInstance(butler, Butler)
474 collections = set(butler.registry.queryCollections())
475 self.assertEqual(collections, {"ingest"})
477 butler2 = Butler(butler=butler, collections=["other"])
478 self.assertEqual(
479 butler2.collections,
480 CollectionSearch.fromExpression(["other"])
481 )
482 self.assertIsNone(butler2.run)
483 self.assertIs(butler.datastore, butler2.datastore)
485 # Test that we can use an environment variable to find this
486 # repository.
487 butler_index = Config()
488 butler_index["label"] = self.tmpConfigFile
489 for suffix in (".yaml", ".json"):
490 # Ensure that the content differs so that we know that
491 # we aren't reusing the cache.
492 bad_label = f"s3://bucket/not_real{suffix}"
493 butler_index["bad_label"] = bad_label
494 with ButlerURI.temporary_uri(suffix=suffix) as temp_file:
495 butler_index.dumpToUri(temp_file)
496 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
497 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label")))
498 uri = Butler.get_repo_uri("bad_label")
499 self.assertEqual(uri, ButlerURI(bad_label))
500 uri = Butler.get_repo_uri("label")
501 butler = Butler(uri, writeable=False)
502 self.assertIsInstance(butler, Butler)
503 with self.assertRaises(KeyError) as cm:
504 Butler.get_repo_uri("missing")
505 self.assertIn("not known to", str(cm.exception))
506 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
507 with self.assertRaises(FileNotFoundError):
508 Butler.get_repo_uri("label")
509 self.assertEqual(Butler.get_known_repos(), set())
510 with self.assertRaises(KeyError) as cm:
511 # No environment variable set.
512 Butler.get_repo_uri("label")
513 self.assertIn("No repository index defined", str(cm.exception))
514 self.assertEqual(Butler.get_known_repos(), set())
516 def testBasicPutGet(self):
517 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
518 self.runPutGetTest(storageClass, "test_metric")
520 def testCompositePutGetConcrete(self):
522 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
523 butler = self.runPutGetTest(storageClass, "test_metric")
525 # Should *not* be disassembled
526 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
527 self.assertEqual(len(datasets), 1)
528 uri, components = butler.getURIs(datasets[0])
529 self.assertIsInstance(uri, ButlerURI)
530 self.assertFalse(components)
531 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
532 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
534 # Predicted dataset
535 dataId = {"instrument": "DummyCamComp", "visit": 424}
536 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
537 self.assertFalse(components)
538 self.assertIsInstance(uri, ButlerURI)
539 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
540 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
542 def testCompositePutGetVirtual(self):
543 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
544 butler = self.runPutGetTest(storageClass, "test_metric_comp")
546 # Should be disassembled
547 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
548 self.assertEqual(len(datasets), 1)
549 uri, components = butler.getURIs(datasets[0])
551 if butler.datastore.isEphemeral:
552 # Never disassemble in-memory datastore
553 self.assertIsInstance(uri, ButlerURI)
554 self.assertFalse(components)
555 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
556 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
557 else:
558 self.assertIsNone(uri)
559 self.assertEqual(set(components), set(storageClass.components))
560 for compuri in components.values():
561 self.assertIsInstance(compuri, ButlerURI)
562 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
563 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
565 # Predicted dataset
566 dataId = {"instrument": "DummyCamComp", "visit": 424}
567 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
569 if butler.datastore.isEphemeral:
570 # Never disassembled
571 self.assertIsInstance(uri, ButlerURI)
572 self.assertFalse(components)
573 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
574 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
575 else:
576 self.assertIsNone(uri)
577 self.assertEqual(set(components), set(storageClass.components))
578 for compuri in components.values():
579 self.assertIsInstance(compuri, ButlerURI)
580 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
581 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
583 def testIngest(self):
584 butler = Butler(self.tmpConfigFile, run="ingest")
586 # Create and register a DatasetType
587 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
589 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
590 datasetTypeName = "metric"
592 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
594 # Add needed Dimensions
595 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
596 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
597 "name": "d-r",
598 "band": "R"})
599 for detector in (1, 2):
600 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
601 "full_name": f"detector{detector}"})
603 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
604 "name": "fourtwentythree", "physical_filter": "d-r"},
605 {"instrument": "DummyCamComp", "id": 424,
606 "name": "fourtwentyfour", "physical_filter": "d-r"})
608 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
609 dataRoot = os.path.join(TESTDIR, "data", "basic")
610 datasets = []
611 for detector in (1, 2):
612 detector_name = f"detector_{detector}"
613 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
614 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
615 # Create a DatasetRef for ingest
616 refIn = DatasetRef(datasetType, dataId, id=None)
618 datasets.append(FileDataset(path=metricFile,
619 refs=[refIn],
620 formatter=formatter))
622 butler.ingest(*datasets, transfer="copy")
624 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
625 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
627 metrics1 = butler.get(datasetTypeName, dataId1)
628 metrics2 = butler.get(datasetTypeName, dataId2)
629 self.assertNotEqual(metrics1, metrics2)
631 # Compare URIs
632 uri1 = butler.getURI(datasetTypeName, dataId1)
633 uri2 = butler.getURI(datasetTypeName, dataId2)
634 self.assertNotEqual(uri1, uri2)
636 # Now do a multi-dataset but single file ingest
637 metricFile = os.path.join(dataRoot, "detectors.yaml")
638 refs = []
639 for detector in (1, 2):
640 detector_name = f"detector_{detector}"
641 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
642 # Create a DatasetRef for ingest
643 refs.append(DatasetRef(datasetType, dataId, id=None))
645 datasets = []
646 datasets.append(FileDataset(path=metricFile,
647 refs=refs,
648 formatter=MultiDetectorFormatter))
650 butler.ingest(*datasets, transfer="copy")
652 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
653 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
655 multi1 = butler.get(datasetTypeName, dataId1)
656 multi2 = butler.get(datasetTypeName, dataId2)
658 self.assertEqual(multi1, metrics1)
659 self.assertEqual(multi2, metrics2)
661 # Compare URIs
662 uri1 = butler.getURI(datasetTypeName, dataId1)
663 uri2 = butler.getURI(datasetTypeName, dataId2)
664 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
666 # Test that removing one does not break the second
667 # This line will issue a warning log message for a ChainedDatastore
668 # that uses an InMemoryDatastore since in-memory can not ingest
669 # files.
670 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
671 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
672 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
673 multi2b = butler.get(datasetTypeName, dataId2)
674 self.assertEqual(multi2, multi2b)
676 def testPruneCollections(self):
677 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
678 butler = Butler(self.tmpConfigFile, writeable=True)
679 # Load registry data with dimensions to hang datasets off of.
680 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
681 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
682 # Add some RUN-type collections.
683 run1 = "run1"
684 butler.registry.registerRun(run1)
685 run2 = "run2"
686 butler.registry.registerRun(run2)
687 # put some datasets. ref1 and ref2 have the same data ID, and are in
688 # different runs. ref3 has a different data ID.
689 metric = makeExampleMetrics()
690 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
691 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
692 butler.registry)
693 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
694 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
695 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
697 # Try to delete a RUN collection without purge, or with purge and not
698 # unstore.
699 with self.assertRaises(TypeError):
700 butler.pruneCollection(run1)
701 with self.assertRaises(TypeError):
702 butler.pruneCollection(run2, purge=True)
703 # Add a TAGGED collection and associate ref3 only into it.
704 tag1 = "tag1"
705 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
706 self.assertTrue(registered)
707 # Registering a second time should be allowed.
708 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
709 self.assertFalse(registered)
710 butler.registry.associate(tag1, [ref3])
711 # Add a CHAINED collection that searches run1 and then run2. It
712 # logically contains only ref1, because ref2 is shadowed due to them
713 # having the same data ID and dataset type.
714 chain1 = "chain1"
715 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
716 butler.registry.setCollectionChain(chain1, [run1, run2])
717 # Try to delete RUN collections, which should fail with complete
718 # rollback because they're still referenced by the CHAINED
719 # collection.
720 with self.assertRaises(Exception):
721 butler.pruneCollection(run1, pruge=True, unstore=True)
722 with self.assertRaises(Exception):
723 butler.pruneCollection(run2, pruge=True, unstore=True)
724 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
725 [ref1, ref2, ref3])
726 existence = butler.datastore.mexists([ref1, ref2, ref3])
727 self.assertTrue(existence[ref1])
728 self.assertTrue(existence[ref2])
729 self.assertTrue(existence[ref3])
730 # Try to delete CHAINED and TAGGED collections with purge; should not
731 # work.
732 with self.assertRaises(TypeError):
733 butler.pruneCollection(tag1, purge=True, unstore=True)
734 with self.assertRaises(TypeError):
735 butler.pruneCollection(chain1, purge=True, unstore=True)
736 # Remove the tagged collection with unstore=False. This should not
737 # affect the datasets.
738 butler.pruneCollection(tag1)
739 with self.assertRaises(MissingCollectionError):
740 butler.registry.getCollectionType(tag1)
741 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
742 [ref1, ref2, ref3])
743 existence = butler.datastore.mexists([ref1, ref2, ref3])
744 self.assertTrue(existence[ref1])
745 self.assertTrue(existence[ref2])
746 self.assertTrue(existence[ref3])
747 # Add the tagged collection back in, and remove it with unstore=True.
748 # This should remove ref3 only from the datastore.
749 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
750 butler.registry.associate(tag1, [ref3])
751 butler.pruneCollection(tag1, unstore=True)
752 with self.assertRaises(MissingCollectionError):
753 butler.registry.getCollectionType(tag1)
754 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
755 [ref1, ref2, ref3])
756 existence = butler.datastore.mexists([ref1, ref2, ref3])
757 self.assertTrue(existence[ref1])
758 self.assertTrue(existence[ref2])
759 self.assertFalse(existence[ref3])
760 # Delete the chain with unstore=False. The datasets should not be
761 # affected at all.
762 butler.pruneCollection(chain1)
763 with self.assertRaises(MissingCollectionError):
764 butler.registry.getCollectionType(chain1)
765 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
766 [ref1, ref2, ref3])
767 existence = butler.datastore.mexists([ref1, ref2, ref3])
768 self.assertTrue(existence[ref1])
769 self.assertTrue(existence[ref2])
770 self.assertFalse(existence[ref3])
771 # Redefine and then delete the chain with unstore=True. Only ref1
772 # should be unstored (ref3 has already been unstored, but otherwise
773 # would be now).
774 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
775 butler.registry.setCollectionChain(chain1, [run1, run2])
776 butler.pruneCollection(chain1, unstore=True)
777 with self.assertRaises(MissingCollectionError):
778 butler.registry.getCollectionType(chain1)
779 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
780 [ref1, ref2, ref3])
781 existence = butler.datastore.mexists([ref1, ref2, ref3])
782 self.assertFalse(existence[ref1])
783 self.assertTrue(existence[ref2])
784 self.assertFalse(existence[ref3])
785 # Remove run1. This removes ref1 and ref3 from the registry (they're
786 # already gone from the datastore, which is fine).
787 butler.pruneCollection(run1, purge=True, unstore=True)
788 with self.assertRaises(MissingCollectionError):
789 butler.registry.getCollectionType(run1)
790 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
791 [ref2])
792 self.assertTrue(butler.datastore.exists(ref2))
793 # Remove run2. This removes ref2 from the registry and the datastore.
794 butler.pruneCollection(run2, purge=True, unstore=True)
795 with self.assertRaises(MissingCollectionError):
796 butler.registry.getCollectionType(run2)
797 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
798 [])
800 # Now that the collections have been pruned we can remove the
801 # dataset type
802 butler.registry.removeDatasetType(datasetType.name)
804 def testPickle(self):
805 """Test pickle support.
806 """
807 butler = Butler(self.tmpConfigFile, run="ingest")
808 butlerOut = pickle.loads(pickle.dumps(butler))
809 self.assertIsInstance(butlerOut, Butler)
810 self.assertEqual(butlerOut._config, butler._config)
811 self.assertEqual(butlerOut.collections, butler.collections)
812 self.assertEqual(butlerOut.run, butler.run)
814 def testGetDatasetTypes(self):
815 butler = Butler(self.tmpConfigFile, run="ingest")
816 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
817 dimensionEntries = [
818 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
819 {"instrument": "DummyCamComp"}),
820 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
821 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
822 ]
823 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
824 # Add needed Dimensions
825 for args in dimensionEntries:
826 butler.registry.insertDimensionData(*args)
828 # When a DatasetType is added to the registry entries are not created
829 # for components but querying them can return the components.
830 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
831 components = set()
832 for datasetTypeName in datasetTypeNames:
833 # Create and register a DatasetType
834 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
836 for componentName in storageClass.components:
837 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
839 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
840 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
842 # Now that we have some dataset types registered, validate them
843 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
844 "datasetType.component", "random_data", "random_data_2"])
846 # Add a new datasetType that will fail template validation
847 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
848 if self.validationCanFail:
849 with self.assertRaises(ValidationError):
850 butler.validateConfiguration()
852 # Rerun validation but with a subset of dataset type names
853 butler.validateConfiguration(datasetTypeNames=["metric4"])
855 # Rerun validation but ignore the bad datasetType
856 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
857 "datasetType.component", "random_data", "random_data_2"])
859 def testTransaction(self):
860 butler = Butler(self.tmpConfigFile, run="ingest")
861 datasetTypeName = "test_metric"
862 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
863 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
864 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
865 "band": "R"}),
866 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
867 "physical_filter": "d-r"}))
868 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
869 metric = makeExampleMetrics()
870 dataId = {"instrument": "DummyCam", "visit": 42}
871 # Create and register a DatasetType
872 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
873 with self.assertRaises(TransactionTestError):
874 with butler.transaction():
875 # Add needed Dimensions
876 for args in dimensionEntries:
877 butler.registry.insertDimensionData(*args)
878 # Store a dataset
879 ref = butler.put(metric, datasetTypeName, dataId)
880 self.assertIsInstance(ref, DatasetRef)
881 # Test getDirect
882 metricOut = butler.getDirect(ref)
883 self.assertEqual(metric, metricOut)
884 # Test get
885 metricOut = butler.get(datasetTypeName, dataId)
886 self.assertEqual(metric, metricOut)
887 # Check we can get components
888 self.assertGetComponents(butler, ref,
889 ("summary", "data", "output"), metric)
890 raise TransactionTestError("This should roll back the entire transaction")
891 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
892 butler.registry.expandDataId(dataId)
893 # Should raise LookupError for missing data ID value
894 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
895 butler.get(datasetTypeName, dataId)
896 # Also check explicitly if Dataset entry is missing
897 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
898 # Direct retrieval should not find the file in the Datastore
899 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
900 butler.getDirect(ref)
902 def testMakeRepo(self):
903 """Test that we can write butler configuration to a new repository via
904 the Butler.makeRepo interface and then instantiate a butler from the
905 repo root.
906 """
907 # Do not run the test if we know this datastore configuration does
908 # not support a file system root
909 if self.fullConfigKey is None:
910 return
912 # create two separate directories
913 root1 = tempfile.mkdtemp(dir=self.root)
914 root2 = tempfile.mkdtemp(dir=self.root)
916 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
917 limited = Config(self.configFile)
918 butler1 = Butler(butlerConfig)
919 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
920 full = Config(self.tmpConfigFile)
921 butler2 = Butler(butlerConfig)
922 # Butlers should have the same configuration regardless of whether
923 # defaults were expanded.
924 self.assertEqual(butler1._config, butler2._config)
925 # Config files loaded directly should not be the same.
926 self.assertNotEqual(limited, full)
927 # Make sure "limited" doesn't have a few keys we know it should be
928 # inheriting from defaults.
929 self.assertIn(self.fullConfigKey, full)
930 self.assertNotIn(self.fullConfigKey, limited)
932 # Collections don't appear until something is put in them
933 collections1 = set(butler1.registry.queryCollections())
934 self.assertEqual(collections1, set())
935 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
937 # Check that a config with no associated file name will not
938 # work properly with relocatable Butler repo
939 butlerConfig.configFile = None
940 with self.assertRaises(ValueError):
941 Butler(butlerConfig)
943 with self.assertRaises(FileExistsError):
944 Butler.makeRepo(self.root, standalone=True,
945 config=Config(self.configFile), overwrite=False)
947 def testStringification(self):
948 butler = Butler(self.tmpConfigFile, run="ingest")
949 butlerStr = str(butler)
951 if self.datastoreStr is not None:
952 for testStr in self.datastoreStr:
953 self.assertIn(testStr, butlerStr)
954 if self.registryStr is not None:
955 self.assertIn(self.registryStr, butlerStr)
957 datastoreName = butler.datastore.name
958 if self.datastoreName is not None:
959 for testStr in self.datastoreName:
960 self.assertIn(testStr, datastoreName)
962 def testButlerRewriteDataId(self):
963 """Test that dataIds can be rewritten based on dimension records."""
965 butler = Butler(self.tmpConfigFile, run="ingest")
967 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
968 datasetTypeName = "random_data"
970 # Create dimension records.
971 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
972 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
973 "name": "d-r",
974 "band": "R"})
975 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp",
976 "id": 1, "full_name": "det1"})
978 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"])
979 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
980 butler.registry.registerDatasetType(datasetType)
982 n_exposures = 5
983 dayobs = 20210530
985 for i in range(n_exposures):
986 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp",
987 "id": i, "obs_id": f"exp{i}",
988 "seq_num": i, "day_obs": dayobs,
989 "physical_filter": "d-r"})
991 # Write some data.
992 for i in range(n_exposures):
993 metric = {"something": i,
994 "other": "metric",
995 "list": [2*x for x in range(i)]}
997 # Use the seq_num for the put to test rewriting.
998 dataId = {"seq_num": i, "day_obs": dayobs, "detector": 1, "instrument": "DummyCamComp",
999 "physical_filter": "d-r"}
1000 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1002 # Check that the exposure is correct in the dataId
1003 self.assertEqual(ref.dataId["exposure"], i)
1005 # and check that we can get the dataset back with the same dataId
1006 new_metric = butler.get(datasetTypeName, dataId=dataId)
1007 self.assertEqual(new_metric, metric)
1010class FileDatastoreButlerTests(ButlerTests):
1011 """Common tests and specialization of ButlerTests for butlers backed
1012 by datastores that inherit from FileDatastore.
1013 """
1015 def checkFileExists(self, root, relpath):
1016 """Checks if file exists at a given path (relative to root).
1018 Test testPutTemplates verifies actual physical existance of the files
1019 in the requested location.
1020 """
1021 uri = ButlerURI(root, forceDirectory=True)
1022 return uri.join(relpath).exists()
1024 def testPutTemplates(self):
1025 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1026 butler = Butler(self.tmpConfigFile, run="ingest")
1028 # Add needed Dimensions
1029 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1030 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
1031 "name": "d-r",
1032 "band": "R"})
1033 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
1034 "physical_filter": "d-r"})
1035 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
1036 "physical_filter": "d-r"})
1038 # Create and store a dataset
1039 metric = makeExampleMetrics()
1041 # Create two almost-identical DatasetTypes (both will use default
1042 # template)
1043 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
1044 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1045 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1046 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1048 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1049 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1051 # Put with exactly the data ID keys needed
1052 ref = butler.put(metric, "metric1", dataId1)
1053 uri = butler.getURI(ref)
1054 self.assertTrue(self.checkFileExists(butler.datastore.root,
1055 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
1056 f"Checking existence of {uri}")
1058 # Check the template based on dimensions
1059 butler.datastore.templates.validateTemplates([ref])
1061 # Put with extra data ID keys (physical_filter is an optional
1062 # dependency); should not change template (at least the way we're
1063 # defining them to behave now; the important thing is that they
1064 # must be consistent).
1065 ref = butler.put(metric, "metric2", dataId2)
1066 uri = butler.getURI(ref)
1067 self.assertTrue(self.checkFileExists(butler.datastore.root,
1068 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
1069 f"Checking existence of {uri}")
1071 # Check the template based on dimensions
1072 butler.datastore.templates.validateTemplates([ref])
1074 # Now use a file template that will not result in unique filenames
1075 with self.assertRaises(FileTemplateValidationError):
1076 butler.put(metric, "metric3", dataId1)
1078 def testImportExport(self):
1079 # Run put/get tests just to create and populate a repo.
1080 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1081 self.runImportExportTest(storageClass)
1083 @unittest.expectedFailure
1084 def testImportExportVirtualComposite(self):
1085 # Run put/get tests just to create and populate a repo.
1086 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1087 self.runImportExportTest(storageClass)
1089 def runImportExportTest(self, storageClass):
1090 """This test does an export to a temp directory and an import back
1091 into a new temp directory repo. It does not assume a posix datastore"""
1092 exportButler = self.runPutGetTest(storageClass, "test_metric")
1093 print("Root:", exportButler.datastore.root)
1094 # Test that the repo actually has at least one dataset.
1095 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1096 self.assertGreater(len(datasets), 0)
1097 # Add a DimensionRecord that's unused by those datasets.
1098 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1099 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1100 # Export and then import datasets.
1101 with safeTestTempDir(TESTDIR) as exportDir:
1102 exportFile = os.path.join(exportDir, "exports.yaml")
1103 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1104 export.saveDatasets(datasets)
1105 # Export the same datasets again. This should quietly do
1106 # nothing because of internal deduplication, and it shouldn't
1107 # complain about being asked to export the "htm7" elements even
1108 # though there aren't any in these datasets or in the database.
1109 export.saveDatasets(datasets, elements=["htm7"])
1110 # Save one of the data IDs again; this should be harmless
1111 # because of internal deduplication.
1112 export.saveDataIds([datasets[0].dataId])
1113 # Save some dimension records directly.
1114 export.saveDimensionData("skymap", [skymapRecord])
1115 self.assertTrue(os.path.exists(exportFile))
1116 with safeTestTempDir(TESTDIR) as importDir:
1117 # We always want this to be a local posix butler
1118 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1119 # Calling script.butlerImport tests the implementation of the
1120 # butler command line interface "import" subcommand. Functions
1121 # in the script folder are generally considered protected and
1122 # should not be used as public api.
1123 with open(exportFile, "r") as f:
1124 script.butlerImport(importDir, export_file=f, directory=exportDir,
1125 transfer="auto", skip_dimensions=None, reuse_ids=False)
1126 importButler = Butler(importDir, run="ingest")
1127 for ref in datasets:
1128 with self.subTest(ref=ref):
1129 # Test for existence by passing in the DatasetType and
1130 # data ID separately, to avoid lookup by dataset_id.
1131 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
1132 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
1133 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
1135 def testRemoveRuns(self):
1136 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1137 butler = Butler(self.tmpConfigFile, writeable=True)
1138 # Load registry data with dimensions to hang datasets off of.
1139 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1140 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1141 # Add some RUN-type collection.
1142 run1 = "run1"
1143 butler.registry.registerRun(run1)
1144 run2 = "run2"
1145 butler.registry.registerRun(run2)
1146 # put a dataset in each
1147 metric = makeExampleMetrics()
1148 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1149 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
1150 butler.registry)
1151 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1152 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1153 uri1 = butler.getURI(ref1, collections=[run1])
1154 uri2 = butler.getURI(ref2, collections=[run2])
1155 # Remove from both runs with different values for unstore.
1156 butler.removeRuns([run1], unstore=True)
1157 butler.removeRuns([run2], unstore=False)
1158 # Should be nothing in registry for either one, and datastore should
1159 # not think either exists.
1160 with self.assertRaises(MissingCollectionError):
1161 butler.registry.getCollectionType(run1)
1162 with self.assertRaises(MissingCollectionError):
1163 butler.registry.getCollectionType(run2)
1164 self.assertFalse(butler.datastore.exists(ref1))
1165 self.assertFalse(butler.datastore.exists(ref2))
1166 # The ref we unstored should be gone according to the URI, but the
1167 # one we forgot should still be around.
1168 self.assertFalse(uri1.exists())
1169 self.assertTrue(uri2.exists())
1172class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1173 """PosixDatastore specialization of a butler"""
1174 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1175 fullConfigKey = ".datastore.formatters"
1176 validationCanFail = True
1177 datastoreStr = ["/tmp"]
1178 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1179 registryStr = "/gen3.sqlite3"
1181 def testPathConstructor(self):
1182 """Independent test of constructor using PathLike.
1183 """
1184 butler = Butler(self.tmpConfigFile, run="ingest")
1185 self.assertIsInstance(butler, Butler)
1187 # And again with a Path object with the butler yaml
1188 path = pathlib.Path(self.tmpConfigFile)
1189 butler = Butler(path, writeable=False)
1190 self.assertIsInstance(butler, Butler)
1192 # And again with a Path object without the butler yaml
1193 # (making sure we skip it if the tmp config doesn't end
1194 # in butler.yaml -- which is the case for a subclass)
1195 if self.tmpConfigFile.endswith("butler.yaml"):
1196 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1197 butler = Butler(path, writeable=False)
1198 self.assertIsInstance(butler, Butler)
1200 def testExportTransferCopy(self):
1201 """Test local export using all transfer modes"""
1202 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1203 exportButler = self.runPutGetTest(storageClass, "test_metric")
1204 # Test that the repo actually has at least one dataset.
1205 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1206 self.assertGreater(len(datasets), 0)
1207 uris = [exportButler.getURI(d) for d in datasets]
1208 datastoreRoot = exportButler.datastore.root
1210 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1212 for path in pathsInStore:
1213 # Assume local file system
1214 self.assertTrue(self.checkFileExists(datastoreRoot, path),
1215 f"Checking path {path}")
1217 for transfer in ("copy", "link", "symlink", "relsymlink"):
1218 with safeTestTempDir(TESTDIR) as exportDir:
1219 with exportButler.export(directory=exportDir, format="yaml",
1220 transfer=transfer) as export:
1221 export.saveDatasets(datasets)
1222 for path in pathsInStore:
1223 self.assertTrue(self.checkFileExists(exportDir, path),
1224 f"Check that mode {transfer} exported files")
1226 def testPruneDatasets(self):
1227 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1228 butler = Butler(self.tmpConfigFile, writeable=True)
1229 # Load registry data with dimensions to hang datasets off of.
1230 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1231 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1232 # Add some RUN-type collections.
1233 run1 = "run1"
1234 butler.registry.registerRun(run1)
1235 run2 = "run2"
1236 butler.registry.registerRun(run2)
1237 # put some datasets. ref1 and ref2 have the same data ID, and are in
1238 # different runs. ref3 has a different data ID.
1239 metric = makeExampleMetrics()
1240 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1241 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
1242 butler.registry)
1243 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1244 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1245 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1247 # Simple prune.
1248 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1249 with self.assertRaises(LookupError):
1250 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1)
1252 # Put data back.
1253 ref1 = butler.put(metric, ref1.unresolved(), run=run1)
1254 ref2 = butler.put(metric, ref2.unresolved(), run=run2)
1255 ref3 = butler.put(metric, ref3.unresolved(), run=run1)
1257 # Check that in normal mode, deleting the record will lead to
1258 # trash not touching the file.
1259 uri1 = butler.datastore.getURI(ref1)
1260 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table
1261 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id})
1262 butler.datastore.trash(ref1)
1263 butler.datastore.emptyTrash()
1264 self.assertTrue(uri1.exists())
1265 uri1.remove() # Clean it up.
1267 # Simulate execution butler setup by deleting the datastore
1268 # record but keeping the file around and trusting.
1269 butler.datastore.trustGetRequest = True
1270 uri2 = butler.datastore.getURI(ref2)
1271 uri3 = butler.datastore.getURI(ref3)
1272 self.assertTrue(uri2.exists())
1273 self.assertTrue(uri3.exists())
1275 # Remove the datastore record.
1276 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table
1277 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id})
1278 self.assertTrue(uri2.exists())
1279 butler.datastore.trash([ref2, ref3])
1280 # Immediate removal for ref2 file
1281 self.assertFalse(uri2.exists())
1282 # But ref3 has to wait for the empty.
1283 self.assertTrue(uri3.exists())
1284 butler.datastore.emptyTrash()
1285 self.assertFalse(uri3.exists())
1287 # Clear out the datasets from registry.
1288 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1291class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1292 """InMemoryDatastore specialization of a butler"""
1293 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1294 fullConfigKey = None
1295 useTempRoot = False
1296 validationCanFail = False
1297 datastoreStr = ["datastore='InMemory"]
1298 datastoreName = ["InMemoryDatastore@"]
1299 registryStr = "/gen3.sqlite3"
1301 def testIngest(self):
1302 pass
1305class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1306 """PosixDatastore specialization"""
1307 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1308 fullConfigKey = ".datastore.datastores.1.formatters"
1309 validationCanFail = True
1310 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1311 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1312 "SecondDatastore"]
1313 registryStr = "/gen3.sqlite3"
1316class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1317 """Test that a yaml file in one location can refer to a root in another."""
1319 datastoreStr = ["dir1"]
1320 # Disable the makeRepo test since we are deliberately not using
1321 # butler.yaml as the config name.
1322 fullConfigKey = None
1324 def setUp(self):
1325 self.root = makeTestTempDir(TESTDIR)
1327 # Make a new repository in one place
1328 self.dir1 = os.path.join(self.root, "dir1")
1329 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1331 # Move the yaml file to a different place and add a "root"
1332 self.dir2 = os.path.join(self.root, "dir2")
1333 os.makedirs(self.dir2, exist_ok=True)
1334 configFile1 = os.path.join(self.dir1, "butler.yaml")
1335 config = Config(configFile1)
1336 config["root"] = self.dir1
1337 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1338 config.dumpToUri(configFile2)
1339 os.remove(configFile1)
1340 self.tmpConfigFile = configFile2
1342 def testFileLocations(self):
1343 self.assertNotEqual(self.dir1, self.dir2)
1344 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1345 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1346 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1349class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1350 """Test that a config file created by makeRepo outside of repo works."""
1352 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1354 def setUp(self):
1355 self.root = makeTestTempDir(TESTDIR)
1356 self.root2 = makeTestTempDir(TESTDIR)
1358 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1359 Butler.makeRepo(self.root, config=Config(self.configFile),
1360 outfile=self.tmpConfigFile)
1362 def tearDown(self):
1363 if os.path.exists(self.root2):
1364 shutil.rmtree(self.root2, ignore_errors=True)
1365 super().tearDown()
1367 def testConfigExistence(self):
1368 c = Config(self.tmpConfigFile)
1369 uri_config = ButlerURI(c["root"])
1370 uri_expected = ButlerURI(self.root, forceDirectory=True)
1371 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1372 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1374 def testPutGet(self):
1375 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1376 self.runPutGetTest(storageClass, "test_metric")
1379class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1380 """Test that a config file created by makeRepo outside of repo works."""
1382 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1384 def setUp(self):
1385 self.root = makeTestTempDir(TESTDIR)
1386 self.root2 = makeTestTempDir(TESTDIR)
1388 self.tmpConfigFile = self.root2
1389 Butler.makeRepo(self.root, config=Config(self.configFile),
1390 outfile=self.tmpConfigFile)
1392 def testConfigExistence(self):
1393 # Append the yaml file else Config constructor does not know the file
1394 # type.
1395 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1396 super().testConfigExistence()
1399class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1400 """Test that a config file created by makeRepo outside of repo works."""
1402 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1404 def setUp(self):
1405 self.root = makeTestTempDir(TESTDIR)
1406 self.root2 = makeTestTempDir(TESTDIR)
1408 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1409 Butler.makeRepo(self.root, config=Config(self.configFile),
1410 outfile=self.tmpConfigFile)
1413@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1414@mock_s3
1415class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1416 """S3Datastore specialization of a butler; an S3 storage Datastore +
1417 a local in-memory SqlRegistry.
1418 """
1419 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1420 fullConfigKey = None
1421 validationCanFail = True
1423 bucketName = "anybucketname"
1424 """Name of the Bucket that will be used in the tests. The name is read from
1425 the config file used with the tests during set-up.
1426 """
1428 root = "butlerRoot/"
1429 """Root repository directory expected to be used in case useTempRoot=False.
1430 Otherwise the root is set to a 20 characters long randomly generated string
1431 during set-up.
1432 """
1434 datastoreStr = [f"datastore={root}"]
1435 """Contains all expected root locations in a format expected to be
1436 returned by Butler stringification.
1437 """
1439 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1440 """The expected format of the S3 Datastore string."""
1442 registryStr = "/gen3.sqlite3"
1443 """Expected format of the Registry string."""
1445 def genRoot(self):
1446 """Returns a random string of len 20 to serve as a root
1447 name for the temporary bucket repo.
1449 This is equivalent to tempfile.mkdtemp as this is what self.root
1450 becomes when useTempRoot is True.
1451 """
1452 rndstr = "".join(
1453 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1454 )
1455 return rndstr + "/"
1457 def setUp(self):
1458 config = Config(self.configFile)
1459 uri = ButlerURI(config[".datastore.datastore.root"])
1460 self.bucketName = uri.netloc
1462 # set up some fake credentials if they do not exist
1463 self.usingDummyCredentials = setAwsEnvCredentials()
1465 if self.useTempRoot:
1466 self.root = self.genRoot()
1467 rooturi = f"s3://{self.bucketName}/{self.root}"
1468 config.update({"datastore": {"datastore": {"root": rooturi}}})
1470 # need local folder to store registry database
1471 self.reg_dir = makeTestTempDir(TESTDIR)
1472 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1474 # MOTO needs to know that we expect Bucket bucketname to exist
1475 # (this used to be the class attribute bucketName)
1476 s3 = boto3.resource("s3")
1477 s3.create_bucket(Bucket=self.bucketName)
1479 self.datastoreStr = f"datastore={self.root}"
1480 self.datastoreName = [f"FileDatastore@{rooturi}"]
1481 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1482 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1484 def tearDown(self):
1485 s3 = boto3.resource("s3")
1486 bucket = s3.Bucket(self.bucketName)
1487 try:
1488 bucket.objects.all().delete()
1489 except botocore.exceptions.ClientError as e:
1490 if e.response["Error"]["Code"] == "404":
1491 # the key was not reachable - pass
1492 pass
1493 else:
1494 raise
1496 bucket = s3.Bucket(self.bucketName)
1497 bucket.delete()
1499 # unset any potentially set dummy credentials
1500 if self.usingDummyCredentials:
1501 unsetAwsEnvCredentials()
1503 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1504 shutil.rmtree(self.reg_dir, ignore_errors=True)
1506 if self.useTempRoot and os.path.exists(self.root):
1507 shutil.rmtree(self.root, ignore_errors=True)
1510@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1511# Mock required environment variables during tests
1512@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1513 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1514 TESTDIR, "config/testConfigs/webdav/token"),
1515 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1516class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1517 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1518 a local in-memory SqlRegistry.
1519 """
1520 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1521 fullConfigKey = None
1522 validationCanFail = True
1524 serverName = "localhost"
1525 """Name of the server that will be used in the tests.
1526 """
1528 portNumber = 8080
1529 """Port on which the webdav server listens. Automatically chosen
1530 at setUpClass via the _getfreeport() method
1531 """
1533 root = "butlerRoot/"
1534 """Root repository directory expected to be used in case useTempRoot=False.
1535 Otherwise the root is set to a 20 characters long randomly generated string
1536 during set-up.
1537 """
1539 datastoreStr = [f"datastore={root}"]
1540 """Contains all expected root locations in a format expected to be
1541 returned by Butler stringification.
1542 """
1544 datastoreName = ["FileDatastore@https://{serverName}/{root}"]
1545 """The expected format of the WebdavDatastore string."""
1547 registryStr = "/gen3.sqlite3"
1548 """Expected format of the Registry string."""
1550 serverThread = None
1551 """Thread in which the local webdav server will run"""
1553 stopWebdavServer = False
1554 """This flag will cause the webdav server to
1555 gracefully shut down when True
1556 """
1558 def genRoot(self):
1559 """Returns a random string of len 20 to serve as a root
1560 name for the temporary bucket repo.
1562 This is equivalent to tempfile.mkdtemp as this is what self.root
1563 becomes when useTempRoot is True.
1564 """
1565 rndstr = "".join(
1566 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1567 )
1568 return rndstr + "/"
1570 @classmethod
1571 def setUpClass(cls):
1572 # Do the same as inherited class
1573 cls.storageClassFactory = StorageClassFactory()
1574 cls.storageClassFactory.addFromConfig(cls.configFile)
1576 cls.portNumber = cls._getfreeport()
1577 # Run a local webdav server on which tests will be run
1578 cls.serverThread = Thread(target=cls._serveWebdav,
1579 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1580 daemon=True)
1581 cls.serverThread.start()
1582 # Wait for it to start
1583 time.sleep(3)
1585 @classmethod
1586 def tearDownClass(cls):
1587 # Ask for graceful shut down of the webdav server
1588 cls.stopWebdavServer = True
1589 # Wait for the thread to exit
1590 cls.serverThread.join()
1592 # Mock required environment variables during tests
1593 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1594 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1595 TESTDIR, "config/testConfigs/webdav/token"),
1596 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1597 def setUp(self):
1598 config = Config(self.configFile)
1600 if self.useTempRoot:
1601 self.root = self.genRoot()
1602 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1603 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1605 # need local folder to store registry database
1606 self.reg_dir = makeTestTempDir(TESTDIR)
1607 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1609 self.datastoreStr = f"datastore={self.root}"
1610 self.datastoreName = [f"FileDatastore@{self.rooturi}"]
1612 if not isWebdavEndpoint(self.rooturi):
1613 raise OSError("Webdav server not running properly: cannot run tests.")
1615 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1616 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1618 # Mock required environment variables during tests
1619 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1620 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1621 TESTDIR, "config/testConfigs/webdav/token"),
1622 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1623 def tearDown(self):
1624 # Clear temporary directory
1625 ButlerURI(self.rooturi).remove()
1626 ButlerURI(self.rooturi).session.close()
1628 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1629 shutil.rmtree(self.reg_dir, ignore_errors=True)
1631 if self.useTempRoot and os.path.exists(self.root):
1632 shutil.rmtree(self.root, ignore_errors=True)
1634 def _serveWebdav(self, port: int, stopWebdavServer):
1635 """Starts a local webdav-compatible HTTP server,
1636 Listening on http://localhost:port
1637 This server only runs when this test class is instantiated,
1638 and then shuts down. Must be started is a separate thread.
1640 Parameters
1641 ----------
1642 port : `int`
1643 The port number on which the server should listen
1644 """
1645 root_path = gettempdir()
1647 config = {
1648 "host": "0.0.0.0",
1649 "port": port,
1650 "provider_mapping": {"/": root_path},
1651 "http_authenticator": {
1652 "domain_controller": None
1653 },
1654 "simple_dc": {"user_mapping": {"*": True}},
1655 "verbose": 0,
1656 }
1657 app = WsgiDAVApp(config)
1659 server_args = {
1660 "bind_addr": (config["host"], config["port"]),
1661 "wsgi_app": app,
1662 }
1663 server = wsgi.Server(**server_args)
1664 server.prepare()
1666 try:
1667 # Start the actual server in a separate thread
1668 t = Thread(target=server.serve, daemon=True)
1669 t.start()
1670 # watch stopWebdavServer, and gracefully
1671 # shut down the server when True
1672 while True:
1673 if stopWebdavServer():
1674 break
1675 time.sleep(1)
1676 except KeyboardInterrupt:
1677 print("Caught Ctrl-C, shutting down...")
1678 finally:
1679 server.stop()
1680 t.join()
1682 def _getfreeport():
1683 """
1684 Determines a free port using sockets.
1685 """
1686 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1687 free_socket.bind(('0.0.0.0', 0))
1688 free_socket.listen()
1689 port = free_socket.getsockname()[1]
1690 free_socket.close()
1691 return port
1694class PosixDatastoreTransfers(unittest.TestCase):
1695 """Test data transfers between butlers.
1697 Test for different managers. UUID to UUID and integer to integer are
1698 tested. UUID to integer is not supported since we do not currently
1699 want to allow that. Integer to UUID is supported with the caveat
1700 that UUID4 will be generated and this will be incorrect for raw
1701 dataset types. The test ignores that.
1702 """
1704 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1706 @classmethod
1707 def setUpClass(cls):
1708 cls.storageClassFactory = StorageClassFactory()
1709 cls.storageClassFactory.addFromConfig(cls.configFile)
1711 def setUp(self):
1712 self.root = makeTestTempDir(TESTDIR)
1713 self.config = Config(self.configFile)
1715 def tearDown(self):
1716 removeTestTempDir(self.root)
1718 def create_butler(self, manager, label):
1719 config = Config(self.configFile)
1720 config["registry", "managers", "datasets"] = manager
1721 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config),
1722 writeable=True)
1724 def create_butlers(self, manager1, manager2):
1725 self.source_butler = self.create_butler(manager1, "1")
1726 self.target_butler = self.create_butler(manager2, "2")
1728 def testTransferUuidToUuid(self):
1729 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1730 "ByDimensionsDatasetRecordStorageManagerUUID",
1731 "lsst.daf.butler.registry.datasets.byDimensions."
1732 "ByDimensionsDatasetRecordStorageManagerUUID",
1733 )
1734 # Setting id_gen_map should have no effect here
1735 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
1737 def testTransferIntToInt(self):
1738 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1739 "ByDimensionsDatasetRecordStorageManager",
1740 "lsst.daf.butler.registry.datasets.byDimensions."
1741 "ByDimensionsDatasetRecordStorageManager",
1742 )
1743 # int dataset ID only allows UNIQUE
1744 self.assertButlerTransfers()
1746 def testTransferIntToUuid(self):
1747 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1748 "ByDimensionsDatasetRecordStorageManager",
1749 "lsst.daf.butler.registry.datasets.byDimensions."
1750 "ByDimensionsDatasetRecordStorageManagerUUID",
1751 )
1752 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
1754 def testTransferMissing(self):
1755 """Test transfers where datastore records are missing.
1757 This is how execution butler works.
1758 """
1759 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1760 "ByDimensionsDatasetRecordStorageManagerUUID",
1761 "lsst.daf.butler.registry.datasets.byDimensions."
1762 "ByDimensionsDatasetRecordStorageManagerUUID",
1763 )
1765 # Configure the source butler to allow trust.
1766 self.source_butler.datastore.trustGetRequest = True
1768 self.assertButlerTransfers(purge=True)
1770 def testTransferMissingDisassembly(self):
1771 """Test transfers where datastore records are missing.
1773 This is how execution butler works.
1774 """
1775 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1776 "ByDimensionsDatasetRecordStorageManagerUUID",
1777 "lsst.daf.butler.registry.datasets.byDimensions."
1778 "ByDimensionsDatasetRecordStorageManagerUUID",
1779 )
1781 # Configure the source butler to allow trust.
1782 self.source_butler.datastore.trustGetRequest = True
1784 # Test disassembly.
1785 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
1787 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"):
1788 """Test that a run can be transferred to another butler."""
1790 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
1791 datasetTypeName = "random_data"
1793 # Test will create 3 collections and we will want to transfer
1794 # two of those three.
1795 runs = ["run1", "run2", "other"]
1797 # Also want to use two different dataset types to ensure that
1798 # grouping works.
1799 datasetTypeNames = ["random_data", "random_data_2"]
1801 # Create the run collections in the source butler.
1802 for run in runs:
1803 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
1805 # Create dimensions in both butlers (transfer will not create them).
1806 n_exposures = 30
1807 for butler in (self.source_butler, self.target_butler):
1808 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1809 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
1810 "name": "d-r",
1811 "band": "R"})
1812 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp",
1813 "id": 1, "full_name": "det1"})
1815 for i in range(n_exposures):
1816 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp",
1817 "id": i, "obs_id": f"exp{i}",
1818 "physical_filter": "d-r"})
1820 # Create dataset types in the source butler.
1821 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"])
1822 for datasetTypeName in datasetTypeNames:
1823 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1824 self.source_butler.registry.registerDatasetType(datasetType)
1826 # Write a dataset to an unrelated run -- this will ensure that
1827 # we are rewriting integer dataset ids in the target if necessary.
1828 # Will not be relevant for UUID.
1829 run = "distraction"
1830 butler = Butler(butler=self.source_butler, run=run)
1831 butler.put(makeExampleMetrics(), datasetTypeName,
1832 exposure=1, detector=1, instrument="DummyCamComp", physical_filter="d-r")
1834 # Write some example metrics to the source
1835 butler = Butler(butler=self.source_butler)
1837 # Set of DatasetRefs that should be in the list of refs to transfer
1838 # but which will not be transferred.
1839 deleted = set()
1841 n_expected = 20 # Number of datasets expected to be transferred
1842 source_refs = []
1843 for i in range(n_exposures):
1844 # Put a third of datasets into each collection, only retain
1845 # two thirds.
1846 index = i % 3
1847 run = runs[index]
1848 datasetTypeName = datasetTypeNames[i % 2]
1850 metric_data = {"summary": {"counter": i},
1851 "output": {"text": "metric"},
1852 "data": [2*x for x in range(i)]}
1853 metric = MetricsExample(**metric_data)
1854 dataId = {"exposure": i, "detector": 1, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1855 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
1857 # Remove the datastore record using low-level API
1858 if purge:
1859 # Remove records for a fraction.
1860 if index == 1:
1862 # For one of these delete the file as well.
1863 # This allows the "missing" code to filter the
1864 # file out.
1865 if not deleted:
1866 primary, uris = butler.datastore.getURIs(ref)
1867 if primary:
1868 primary.remove()
1869 for uri in uris.values():
1870 uri.remove()
1871 n_expected -= 1
1872 deleted.add(ref)
1874 # Remove the datastore record.
1875 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1877 if index < 2:
1878 source_refs.append(ref)
1879 if ref not in deleted:
1880 new_metric = butler.get(ref.unresolved(), collections=run)
1881 self.assertEqual(new_metric, metric)
1883 # Create some bad dataset types to ensure we check for inconsistent
1884 # definitions.
1885 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
1886 for datasetTypeName in datasetTypeNames:
1887 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
1888 self.target_butler.registry.registerDatasetType(datasetType)
1889 with self.assertRaises(ConflictingDefinitionError):
1890 self.target_butler.transfer_from(self.source_butler, source_refs,
1891 id_gen_map=id_gen_map)
1892 # And remove the bad definitions.
1893 for datasetTypeName in datasetTypeNames:
1894 self.target_butler.registry.removeDatasetType(datasetTypeName)
1896 # Transfer without creating dataset types should fail.
1897 with self.assertRaises(KeyError):
1898 self.target_butler.transfer_from(self.source_butler, source_refs,
1899 id_gen_map=id_gen_map)
1901 # Now transfer them to the second butler
1902 with self.assertLogs(level=logging.DEBUG) as cm:
1903 transferred = self.target_butler.transfer_from(self.source_butler, source_refs,
1904 id_gen_map=id_gen_map,
1905 register_dataset_types=True)
1906 self.assertEqual(len(transferred), n_expected)
1907 log_output = ";".join(cm.output)
1908 self.assertIn("found in datastore for chunk", log_output)
1909 self.assertIn("Creating output run", log_output)
1911 # Do the transfer twice to ensure that it will do nothing extra.
1912 # Only do this if purge=True because it does not work for int
1913 # dataset_id.
1914 if purge:
1915 # This should not need to register dataset types.
1916 transferred = self.target_butler.transfer_from(self.source_butler, source_refs,
1917 id_gen_map=id_gen_map)
1918 self.assertEqual(len(transferred), n_expected)
1920 # Also do an explicit low-level transfer to trigger some
1921 # edge cases.
1922 with self.assertLogs(level=logging.DEBUG) as cm:
1923 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs)
1924 log_output = ";".join(cm.output)
1925 self.assertIn("no file artifacts exist", log_output)
1927 with self.assertRaises(TypeError):
1928 self.target_butler.datastore.transfer_from(self.source_butler, source_refs)
1930 with self.assertRaises(ValueError):
1931 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs,
1932 transfer="split")
1934 # Now try to get the same refs from the new butler.
1935 for ref in source_refs:
1936 if ref not in deleted:
1937 unresolved_ref = ref.unresolved()
1938 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run)
1939 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run)
1940 self.assertEqual(new_metric, old_metric)
1942 # Now prune run2 collection and create instead a CHAINED collection.
1943 # This should block the transfer.
1944 self.target_butler.pruneCollection("run2", purge=True, unstore=True)
1945 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
1946 with self.assertRaises(TypeError):
1947 # Re-importing the run1 datasets can be problematic if they
1948 # use integer IDs so filter those out.
1949 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
1950 self.target_butler.transfer_from(self.source_butler, to_transfer,
1951 id_gen_map=id_gen_map)
1954if __name__ == "__main__": 1954 ↛ 1955line 1954 didn't jump to line 1955, because the condition on line 1954 was never true
1955 unittest.main()