Coverage for tests/test_butler.py: 17%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import logging
26import os
27import posixpath
28import unittest
29import tempfile
30import shutil
31import pickle
32import string
33import random
34import time
35import socket
37try:
38 import boto3
39 import botocore
40 from moto import mock_s3
41except ImportError:
42 boto3 = None
44 def mock_s3(cls):
45 """A no-op decorator in case moto mock_s3 can not be imported.
46 """
47 return cls
49try:
50 from cheroot import wsgi
51 from wsgidav.wsgidav_app import WsgiDAVApp
52except ImportError:
53 WsgiDAVApp = None
55import astropy.time
56from threading import Thread
57from tempfile import gettempdir
58from lsst.utils import doImport
59from lsst.daf.butler.core.utils import safeMakeDir
60from lsst.daf.butler import Butler, Config, ButlerConfig
61from lsst.daf.butler import StorageClassFactory
62from lsst.daf.butler import DatasetType, DatasetRef, DatasetIdGenEnum
63from lsst.daf.butler import FileTemplateValidationError, ValidationError
64from lsst.daf.butler import FileDataset
65from lsst.daf.butler import CollectionSearch, CollectionType
66from lsst.daf.butler import ButlerURI
67from lsst.daf.butler import script
68from lsst.daf.butler.registry import MissingCollectionError, ConflictingDefinitionError
69from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
70from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials,
71 unsetAwsEnvCredentials)
72from lsst.daf.butler.core._butlerUri.http import _is_webdav_endpoint
74from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
75from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir
77TESTDIR = os.path.abspath(os.path.dirname(__file__))
80def makeExampleMetrics():
81 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
82 {"a": [1, 2, 3],
83 "b": {"blue": 5, "red": "green"}},
84 [563, 234, 456.7, 752, 8, 9, 27]
85 )
88class TransactionTestError(Exception):
89 """Specific error for testing transactions, to prevent misdiagnosing
90 that might otherwise occur when a standard exception is used.
91 """
92 pass
95class ButlerConfigTests(unittest.TestCase):
96 """Simple tests for ButlerConfig that are not tested in other test cases.
97 """
99 def testSearchPath(self):
100 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
101 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
102 config1 = ButlerConfig(configFile)
103 self.assertNotIn("testConfigs", "\n".join(cm.output))
105 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
106 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
107 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
108 self.assertIn("testConfigs", "\n".join(cm.output))
110 key = ("datastore", "records", "table")
111 self.assertNotEqual(config1[key], config2[key])
112 self.assertEqual(config2[key], "override_record")
115class ButlerPutGetTests:
116 """Helper method for running a suite of put/get tests from different
117 butler configurations."""
119 root = None
121 @staticmethod
122 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
123 """Create a DatasetType and register it
124 """
125 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
126 registry.registerDatasetType(datasetType)
127 return datasetType
129 @classmethod
130 def setUpClass(cls):
131 cls.storageClassFactory = StorageClassFactory()
132 cls.storageClassFactory.addFromConfig(cls.configFile)
134 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
135 datasetType = datasetRef.datasetType
136 dataId = datasetRef.dataId
137 deferred = butler.getDirectDeferred(datasetRef)
139 for component in components:
140 compTypeName = datasetType.componentTypeName(component)
141 result = butler.get(compTypeName, dataId, collections=collections)
142 self.assertEqual(result, getattr(reference, component))
143 result_deferred = deferred.get(component=component)
144 self.assertEqual(result_deferred, result)
146 def tearDown(self):
147 removeTestTempDir(self.root)
149 def runPutGetTest(self, storageClass, datasetTypeName):
150 # New datasets will be added to run and tag, but we will only look in
151 # tag when looking up datasets.
152 run = "ingest"
153 butler = Butler(self.tmpConfigFile, run=run)
155 collections = set(butler.registry.queryCollections())
156 self.assertEqual(collections, set([run]))
158 # Create and register a DatasetType
159 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
161 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
163 # Add needed Dimensions
164 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
165 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
166 "name": "d-r",
167 "band": "R"})
168 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
169 "id": 1,
170 "name": "default"})
171 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
172 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
173 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
174 "name": "fourtwentythree", "physical_filter": "d-r",
175 "visit_system": 1, "datetime_begin": visit_start,
176 "datetime_end": visit_end})
178 # Add a second visit for some later tests
179 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
180 "name": "fourtwentyfour", "physical_filter": "d-r",
181 "visit_system": 1})
183 # Create and store a dataset
184 metric = makeExampleMetrics()
185 dataId = {"instrument": "DummyCamComp", "visit": 423}
187 # Create a DatasetRef for put
188 refIn = DatasetRef(datasetType, dataId, id=None)
190 # Put with a preexisting id should fail
191 with self.assertRaises(ValueError):
192 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
194 # Put and remove the dataset once as a DatasetRef, once as a dataId,
195 # and once with a DatasetType
197 # Keep track of any collections we add and do not clean up
198 expected_collections = {run}
200 counter = 0
201 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
202 # Since we are using subTest we can get cascading failures
203 # here with the first attempt failing and the others failing
204 # immediately because the dataset already exists. Work around
205 # this by using a distinct run collection each time
206 counter += 1
207 this_run = f"put_run_{counter}"
208 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
209 expected_collections.update({this_run})
211 with self.subTest(args=args):
212 ref = butler.put(metric, *args, run=this_run)
213 self.assertIsInstance(ref, DatasetRef)
215 # Test getDirect
216 metricOut = butler.getDirect(ref)
217 self.assertEqual(metric, metricOut)
218 # Test get
219 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
220 self.assertEqual(metric, metricOut)
221 # Test get with a datasetRef
222 metricOut = butler.get(ref, collections=this_run)
223 self.assertEqual(metric, metricOut)
224 # Test getDeferred with dataId
225 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
226 self.assertEqual(metric, metricOut)
227 # Test getDeferred with a datasetRef
228 metricOut = butler.getDeferred(ref, collections=this_run).get()
229 self.assertEqual(metric, metricOut)
230 # and deferred direct with ref
231 metricOut = butler.getDirectDeferred(ref).get()
232 self.assertEqual(metric, metricOut)
234 # Check we can get components
235 if storageClass.isComposite():
236 self.assertGetComponents(butler, ref,
237 ("summary", "data", "output"), metric,
238 collections=this_run)
240 # Can the artifacts themselves be retrieved?
241 if not butler.datastore.isEphemeral:
242 root_uri = ButlerURI(self.root)
244 for preserve_path in (True, False):
245 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
246 # Use copy so that we can test that overwrite
247 # protection works (using "auto" for File URIs would
248 # use hard links and subsequent transfer would work
249 # because it knows they are the same file).
250 transferred = butler.retrieveArtifacts([ref], destination,
251 preserve_path=preserve_path, transfer="copy")
252 self.assertGreater(len(transferred), 0)
253 artifacts = list(ButlerURI.findFileResources([destination]))
254 self.assertEqual(set(transferred), set(artifacts))
256 for artifact in transferred:
257 path_in_destination = artifact.relative_to(destination)
258 self.assertIsNotNone(path_in_destination)
260 # when path is not preserved there should not be
261 # any path separators.
262 num_seps = path_in_destination.count("/")
263 if preserve_path:
264 self.assertGreater(num_seps, 0)
265 else:
266 self.assertEqual(num_seps, 0)
268 primary_uri, secondary_uris = butler.datastore.getURIs(ref)
269 n_uris = len(secondary_uris)
270 if primary_uri:
271 n_uris += 1
272 self.assertEqual(len(artifacts), n_uris, "Comparing expected artifacts vs actual:"
273 f" {artifacts} vs {primary_uri} and {secondary_uris}")
275 if preserve_path:
276 # No need to run these twice
277 with self.assertRaises(ValueError):
278 butler.retrieveArtifacts([ref], destination, transfer="move")
280 with self.assertRaises(FileExistsError):
281 butler.retrieveArtifacts([ref], destination)
283 transferred_again = butler.retrieveArtifacts([ref], destination,
284 preserve_path=preserve_path,
285 overwrite=True)
286 self.assertEqual(set(transferred_again), set(transferred))
288 # Now remove the dataset completely.
289 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run)
290 # Lookup with original args should still fail.
291 with self.assertRaises(LookupError):
292 butler.datasetExists(*args, collections=this_run)
293 # getDirect() should still fail.
294 with self.assertRaises(FileNotFoundError):
295 butler.getDirect(ref)
296 # Registry shouldn't be able to find it by dataset_id anymore.
297 self.assertIsNone(butler.registry.getDataset(ref.id))
299 # Do explicit registry removal since we know they are
300 # empty
301 butler.registry.removeCollection(this_run)
302 expected_collections.remove(this_run)
304 # Put the dataset again, since the last thing we did was remove it
305 # and we want to use the default collection.
306 ref = butler.put(metric, refIn)
308 # Get with parameters
309 stop = 4
310 sliced = butler.get(ref, parameters={"slice": slice(stop)})
311 self.assertNotEqual(metric, sliced)
312 self.assertEqual(metric.summary, sliced.summary)
313 self.assertEqual(metric.output, sliced.output)
314 self.assertEqual(metric.data[:stop], sliced.data)
315 # getDeferred with parameters
316 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
317 self.assertNotEqual(metric, sliced)
318 self.assertEqual(metric.summary, sliced.summary)
319 self.assertEqual(metric.output, sliced.output)
320 self.assertEqual(metric.data[:stop], sliced.data)
321 # getDeferred with deferred parameters
322 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
323 self.assertNotEqual(metric, sliced)
324 self.assertEqual(metric.summary, sliced.summary)
325 self.assertEqual(metric.output, sliced.output)
326 self.assertEqual(metric.data[:stop], sliced.data)
328 if storageClass.isComposite():
329 # Check that components can be retrieved
330 metricOut = butler.get(ref.datasetType.name, dataId)
331 compNameS = ref.datasetType.componentTypeName("summary")
332 compNameD = ref.datasetType.componentTypeName("data")
333 summary = butler.get(compNameS, dataId)
334 self.assertEqual(summary, metric.summary)
335 data = butler.get(compNameD, dataId)
336 self.assertEqual(data, metric.data)
338 if "counter" in storageClass.derivedComponents:
339 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
340 self.assertEqual(count, len(data))
342 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
343 parameters={"slice": slice(stop)})
344 self.assertEqual(count, stop)
346 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
347 summary = butler.getDirect(compRef)
348 self.assertEqual(summary, metric.summary)
350 # Create a Dataset type that has the same name but is inconsistent.
351 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
352 self.storageClassFactory.getStorageClass("Config"))
354 # Getting with a dataset type that does not match registry fails
355 with self.assertRaises(ValueError):
356 butler.get(inconsistentDatasetType, dataId)
358 # Combining a DatasetRef with a dataId should fail
359 with self.assertRaises(ValueError):
360 butler.get(ref, dataId)
361 # Getting with an explicit ref should fail if the id doesn't match
362 with self.assertRaises(ValueError):
363 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
365 # Getting a dataset with unknown parameters should fail
366 with self.assertRaises(KeyError):
367 butler.get(ref, parameters={"unsupported": True})
369 # Check we have a collection
370 collections = set(butler.registry.queryCollections())
371 self.assertEqual(collections, expected_collections)
373 # Clean up to check that we can remove something that may have
374 # already had a component removed
375 butler.pruneDatasets([ref], unstore=True, purge=True)
377 # Check that we can configure a butler to accept a put even
378 # if it already has the dataset in registry.
379 ref = butler.put(metric, refIn)
381 # Repeat put will fail.
382 with self.assertRaises(ConflictingDefinitionError):
383 butler.put(metric, refIn)
385 # Remove the datastore entry.
386 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
388 # Put will still fail
389 with self.assertRaises(ConflictingDefinitionError):
390 butler.put(metric, refIn)
392 # Allow the put to succeed
393 butler._allow_put_of_predefined_dataset = True
394 ref2 = butler.put(metric, refIn)
395 self.assertEqual(ref2.id, ref.id)
397 # A second put will still fail but with a different exception
398 # than before.
399 with self.assertRaises(ConflictingDefinitionError):
400 butler.put(metric, refIn)
402 # Reset the flag to avoid confusion
403 butler._allow_put_of_predefined_dataset = False
405 # Leave the dataset in place since some downstream tests require
406 # something to be present
408 return butler
410 def testDeferredCollectionPassing(self):
411 # Construct a butler with no run or collection, but make it writeable.
412 butler = Butler(self.tmpConfigFile, writeable=True)
413 # Create and register a DatasetType
414 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
415 datasetType = self.addDatasetType("example", dimensions,
416 self.storageClassFactory.getStorageClass("StructuredData"),
417 butler.registry)
418 # Add needed Dimensions
419 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
420 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
421 "name": "d-r",
422 "band": "R"})
423 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
424 "name": "fourtwentythree", "physical_filter": "d-r"})
425 dataId = {"instrument": "DummyCamComp", "visit": 423}
426 # Create dataset.
427 metric = makeExampleMetrics()
428 # Register a new run and put dataset.
429 run = "deferred"
430 self.assertTrue(butler.registry.registerRun(run))
431 # Second time it will be allowed but indicate no-op
432 self.assertFalse(butler.registry.registerRun(run))
433 ref = butler.put(metric, datasetType, dataId, run=run)
434 # Putting with no run should fail with TypeError.
435 with self.assertRaises(TypeError):
436 butler.put(metric, datasetType, dataId)
437 # Dataset should exist.
438 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
439 # We should be able to get the dataset back, but with and without
440 # a deferred dataset handle.
441 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
442 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
443 # Trying to find the dataset without any collection is a TypeError.
444 with self.assertRaises(TypeError):
445 butler.datasetExists(datasetType, dataId)
446 with self.assertRaises(TypeError):
447 butler.get(datasetType, dataId)
448 # Associate the dataset with a different collection.
449 butler.registry.registerCollection("tagged")
450 butler.registry.associate("tagged", [ref])
451 # Deleting the dataset from the new collection should make it findable
452 # in the original collection.
453 butler.pruneDatasets([ref], tags=["tagged"])
454 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
457class ButlerTests(ButlerPutGetTests):
458 """Tests for Butler.
459 """
460 useTempRoot = True
462 def setUp(self):
463 """Create a new butler root for each test."""
464 self.root = makeTestTempDir(TESTDIR)
465 Butler.makeRepo(self.root, config=Config(self.configFile))
466 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
468 def testConstructor(self):
469 """Independent test of constructor.
470 """
471 butler = Butler(self.tmpConfigFile, run="ingest")
472 self.assertIsInstance(butler, Butler)
474 collections = set(butler.registry.queryCollections())
475 self.assertEqual(collections, {"ingest"})
477 butler2 = Butler(butler=butler, collections=["other"])
478 self.assertEqual(
479 butler2.collections,
480 CollectionSearch.fromExpression(["other"])
481 )
482 self.assertIsNone(butler2.run)
483 self.assertIs(butler.datastore, butler2.datastore)
485 def testBasicPutGet(self):
486 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
487 self.runPutGetTest(storageClass, "test_metric")
489 def testCompositePutGetConcrete(self):
491 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
492 butler = self.runPutGetTest(storageClass, "test_metric")
494 # Should *not* be disassembled
495 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
496 self.assertEqual(len(datasets), 1)
497 uri, components = butler.getURIs(datasets[0])
498 self.assertIsInstance(uri, ButlerURI)
499 self.assertFalse(components)
500 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
501 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
503 # Predicted dataset
504 dataId = {"instrument": "DummyCamComp", "visit": 424}
505 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
506 self.assertFalse(components)
507 self.assertIsInstance(uri, ButlerURI)
508 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
509 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
511 def testCompositePutGetVirtual(self):
512 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
513 butler = self.runPutGetTest(storageClass, "test_metric_comp")
515 # Should be disassembled
516 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
517 self.assertEqual(len(datasets), 1)
518 uri, components = butler.getURIs(datasets[0])
520 if butler.datastore.isEphemeral:
521 # Never disassemble in-memory datastore
522 self.assertIsInstance(uri, ButlerURI)
523 self.assertFalse(components)
524 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
525 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
526 else:
527 self.assertIsNone(uri)
528 self.assertEqual(set(components), set(storageClass.components))
529 for compuri in components.values():
530 self.assertIsInstance(compuri, ButlerURI)
531 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
532 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
534 # Predicted dataset
535 dataId = {"instrument": "DummyCamComp", "visit": 424}
536 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
538 if butler.datastore.isEphemeral:
539 # Never disassembled
540 self.assertIsInstance(uri, ButlerURI)
541 self.assertFalse(components)
542 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
543 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
544 else:
545 self.assertIsNone(uri)
546 self.assertEqual(set(components), set(storageClass.components))
547 for compuri in components.values():
548 self.assertIsInstance(compuri, ButlerURI)
549 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
550 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
552 def testIngest(self):
553 butler = Butler(self.tmpConfigFile, run="ingest")
555 # Create and register a DatasetType
556 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
558 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
559 datasetTypeName = "metric"
561 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
563 # Add needed Dimensions
564 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
565 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
566 "name": "d-r",
567 "band": "R"})
568 for detector in (1, 2):
569 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
570 "full_name": f"detector{detector}"})
572 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
573 "name": "fourtwentythree", "physical_filter": "d-r"},
574 {"instrument": "DummyCamComp", "id": 424,
575 "name": "fourtwentyfour", "physical_filter": "d-r"})
577 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
578 dataRoot = os.path.join(TESTDIR, "data", "basic")
579 datasets = []
580 for detector in (1, 2):
581 detector_name = f"detector_{detector}"
582 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
583 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
584 # Create a DatasetRef for ingest
585 refIn = DatasetRef(datasetType, dataId, id=None)
587 datasets.append(FileDataset(path=metricFile,
588 refs=[refIn],
589 formatter=formatter))
591 butler.ingest(*datasets, transfer="copy")
593 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
594 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
596 metrics1 = butler.get(datasetTypeName, dataId1)
597 metrics2 = butler.get(datasetTypeName, dataId2)
598 self.assertNotEqual(metrics1, metrics2)
600 # Compare URIs
601 uri1 = butler.getURI(datasetTypeName, dataId1)
602 uri2 = butler.getURI(datasetTypeName, dataId2)
603 self.assertNotEqual(uri1, uri2)
605 # Now do a multi-dataset but single file ingest
606 metricFile = os.path.join(dataRoot, "detectors.yaml")
607 refs = []
608 for detector in (1, 2):
609 detector_name = f"detector_{detector}"
610 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
611 # Create a DatasetRef for ingest
612 refs.append(DatasetRef(datasetType, dataId, id=None))
614 datasets = []
615 datasets.append(FileDataset(path=metricFile,
616 refs=refs,
617 formatter=MultiDetectorFormatter))
619 butler.ingest(*datasets, transfer="copy")
621 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
622 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
624 multi1 = butler.get(datasetTypeName, dataId1)
625 multi2 = butler.get(datasetTypeName, dataId2)
627 self.assertEqual(multi1, metrics1)
628 self.assertEqual(multi2, metrics2)
630 # Compare URIs
631 uri1 = butler.getURI(datasetTypeName, dataId1)
632 uri2 = butler.getURI(datasetTypeName, dataId2)
633 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
635 # Test that removing one does not break the second
636 # This line will issue a warning log message for a ChainedDatastore
637 # that uses an InMemoryDatastore since in-memory can not ingest
638 # files.
639 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
640 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
641 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
642 multi2b = butler.get(datasetTypeName, dataId2)
643 self.assertEqual(multi2, multi2b)
645 def testPruneCollections(self):
646 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
647 butler = Butler(self.tmpConfigFile, writeable=True)
648 # Load registry data with dimensions to hang datasets off of.
649 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
650 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
651 # Add some RUN-type collections.
652 run1 = "run1"
653 butler.registry.registerRun(run1)
654 run2 = "run2"
655 butler.registry.registerRun(run2)
656 # put some datasets. ref1 and ref2 have the same data ID, and are in
657 # different runs. ref3 has a different data ID.
658 metric = makeExampleMetrics()
659 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
660 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
661 butler.registry)
662 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
663 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
664 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
666 # Try to delete a RUN collection without purge, or with purge and not
667 # unstore.
668 with self.assertRaises(TypeError):
669 butler.pruneCollection(run1)
670 with self.assertRaises(TypeError):
671 butler.pruneCollection(run2, purge=True)
672 # Add a TAGGED collection and associate ref3 only into it.
673 tag1 = "tag1"
674 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
675 self.assertTrue(registered)
676 # Registering a second time should be allowed.
677 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
678 self.assertFalse(registered)
679 butler.registry.associate(tag1, [ref3])
680 # Add a CHAINED collection that searches run1 and then run2. It
681 # logically contains only ref1, because ref2 is shadowed due to them
682 # having the same data ID and dataset type.
683 chain1 = "chain1"
684 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
685 butler.registry.setCollectionChain(chain1, [run1, run2])
686 # Try to delete RUN collections, which should fail with complete
687 # rollback because they're still referenced by the CHAINED
688 # collection.
689 with self.assertRaises(Exception):
690 butler.pruneCollection(run1, pruge=True, unstore=True)
691 with self.assertRaises(Exception):
692 butler.pruneCollection(run2, pruge=True, unstore=True)
693 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
694 [ref1, ref2, ref3])
695 existence = butler.datastore.mexists([ref1, ref2, ref3])
696 self.assertTrue(existence[ref1])
697 self.assertTrue(existence[ref2])
698 self.assertTrue(existence[ref3])
699 # Try to delete CHAINED and TAGGED collections with purge; should not
700 # work.
701 with self.assertRaises(TypeError):
702 butler.pruneCollection(tag1, purge=True, unstore=True)
703 with self.assertRaises(TypeError):
704 butler.pruneCollection(chain1, purge=True, unstore=True)
705 # Remove the tagged collection with unstore=False. This should not
706 # affect the datasets.
707 butler.pruneCollection(tag1)
708 with self.assertRaises(MissingCollectionError):
709 butler.registry.getCollectionType(tag1)
710 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
711 [ref1, ref2, ref3])
712 existence = butler.datastore.mexists([ref1, ref2, ref3])
713 self.assertTrue(existence[ref1])
714 self.assertTrue(existence[ref2])
715 self.assertTrue(existence[ref3])
716 # Add the tagged collection back in, and remove it with unstore=True.
717 # This should remove ref3 only from the datastore.
718 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
719 butler.registry.associate(tag1, [ref3])
720 butler.pruneCollection(tag1, unstore=True)
721 with self.assertRaises(MissingCollectionError):
722 butler.registry.getCollectionType(tag1)
723 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
724 [ref1, ref2, ref3])
725 existence = butler.datastore.mexists([ref1, ref2, ref3])
726 self.assertTrue(existence[ref1])
727 self.assertTrue(existence[ref2])
728 self.assertFalse(existence[ref3])
729 # Delete the chain with unstore=False. The datasets should not be
730 # affected at all.
731 butler.pruneCollection(chain1)
732 with self.assertRaises(MissingCollectionError):
733 butler.registry.getCollectionType(chain1)
734 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
735 [ref1, ref2, ref3])
736 existence = butler.datastore.mexists([ref1, ref2, ref3])
737 self.assertTrue(existence[ref1])
738 self.assertTrue(existence[ref2])
739 self.assertFalse(existence[ref3])
740 # Redefine and then delete the chain with unstore=True. Only ref1
741 # should be unstored (ref3 has already been unstored, but otherwise
742 # would be now).
743 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
744 butler.registry.setCollectionChain(chain1, [run1, run2])
745 butler.pruneCollection(chain1, unstore=True)
746 with self.assertRaises(MissingCollectionError):
747 butler.registry.getCollectionType(chain1)
748 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
749 [ref1, ref2, ref3])
750 existence = butler.datastore.mexists([ref1, ref2, ref3])
751 self.assertFalse(existence[ref1])
752 self.assertTrue(existence[ref2])
753 self.assertFalse(existence[ref3])
754 # Remove run1. This removes ref1 and ref3 from the registry (they're
755 # already gone from the datastore, which is fine).
756 butler.pruneCollection(run1, purge=True, unstore=True)
757 with self.assertRaises(MissingCollectionError):
758 butler.registry.getCollectionType(run1)
759 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
760 [ref2])
761 self.assertTrue(butler.datastore.exists(ref2))
762 # Remove run2. This removes ref2 from the registry and the datastore.
763 butler.pruneCollection(run2, purge=True, unstore=True)
764 with self.assertRaises(MissingCollectionError):
765 butler.registry.getCollectionType(run2)
766 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
767 [])
769 # Now that the collections have been pruned we can remove the
770 # dataset type
771 butler.registry.removeDatasetType(datasetType.name)
773 def testPickle(self):
774 """Test pickle support.
775 """
776 butler = Butler(self.tmpConfigFile, run="ingest")
777 butlerOut = pickle.loads(pickle.dumps(butler))
778 self.assertIsInstance(butlerOut, Butler)
779 self.assertEqual(butlerOut._config, butler._config)
780 self.assertEqual(butlerOut.collections, butler.collections)
781 self.assertEqual(butlerOut.run, butler.run)
783 def testGetDatasetTypes(self):
784 butler = Butler(self.tmpConfigFile, run="ingest")
785 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
786 dimensionEntries = [
787 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
788 {"instrument": "DummyCamComp"}),
789 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
790 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
791 ]
792 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
793 # Add needed Dimensions
794 for args in dimensionEntries:
795 butler.registry.insertDimensionData(*args)
797 # When a DatasetType is added to the registry entries are not created
798 # for components but querying them can return the components.
799 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
800 components = set()
801 for datasetTypeName in datasetTypeNames:
802 # Create and register a DatasetType
803 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
805 for componentName in storageClass.components:
806 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
808 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
809 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
811 # Now that we have some dataset types registered, validate them
812 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
813 "datasetType.component", "random_data", "random_data_2"])
815 # Add a new datasetType that will fail template validation
816 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
817 if self.validationCanFail:
818 with self.assertRaises(ValidationError):
819 butler.validateConfiguration()
821 # Rerun validation but with a subset of dataset type names
822 butler.validateConfiguration(datasetTypeNames=["metric4"])
824 # Rerun validation but ignore the bad datasetType
825 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
826 "datasetType.component", "random_data", "random_data_2"])
828 def testTransaction(self):
829 butler = Butler(self.tmpConfigFile, run="ingest")
830 datasetTypeName = "test_metric"
831 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
832 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
833 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
834 "band": "R"}),
835 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
836 "physical_filter": "d-r"}))
837 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
838 metric = makeExampleMetrics()
839 dataId = {"instrument": "DummyCam", "visit": 42}
840 # Create and register a DatasetType
841 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
842 with self.assertRaises(TransactionTestError):
843 with butler.transaction():
844 # Add needed Dimensions
845 for args in dimensionEntries:
846 butler.registry.insertDimensionData(*args)
847 # Store a dataset
848 ref = butler.put(metric, datasetTypeName, dataId)
849 self.assertIsInstance(ref, DatasetRef)
850 # Test getDirect
851 metricOut = butler.getDirect(ref)
852 self.assertEqual(metric, metricOut)
853 # Test get
854 metricOut = butler.get(datasetTypeName, dataId)
855 self.assertEqual(metric, metricOut)
856 # Check we can get components
857 self.assertGetComponents(butler, ref,
858 ("summary", "data", "output"), metric)
859 raise TransactionTestError("This should roll back the entire transaction")
860 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
861 butler.registry.expandDataId(dataId)
862 # Should raise LookupError for missing data ID value
863 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
864 butler.get(datasetTypeName, dataId)
865 # Also check explicitly if Dataset entry is missing
866 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
867 # Direct retrieval should not find the file in the Datastore
868 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
869 butler.getDirect(ref)
871 def testMakeRepo(self):
872 """Test that we can write butler configuration to a new repository via
873 the Butler.makeRepo interface and then instantiate a butler from the
874 repo root.
875 """
876 # Do not run the test if we know this datastore configuration does
877 # not support a file system root
878 if self.fullConfigKey is None:
879 return
881 # create two separate directories
882 root1 = tempfile.mkdtemp(dir=self.root)
883 root2 = tempfile.mkdtemp(dir=self.root)
885 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
886 limited = Config(self.configFile)
887 butler1 = Butler(butlerConfig)
888 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
889 full = Config(self.tmpConfigFile)
890 butler2 = Butler(butlerConfig)
891 # Butlers should have the same configuration regardless of whether
892 # defaults were expanded.
893 self.assertEqual(butler1._config, butler2._config)
894 # Config files loaded directly should not be the same.
895 self.assertNotEqual(limited, full)
896 # Make sure "limited" doesn't have a few keys we know it should be
897 # inheriting from defaults.
898 self.assertIn(self.fullConfigKey, full)
899 self.assertNotIn(self.fullConfigKey, limited)
901 # Collections don't appear until something is put in them
902 collections1 = set(butler1.registry.queryCollections())
903 self.assertEqual(collections1, set())
904 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
906 # Check that a config with no associated file name will not
907 # work properly with relocatable Butler repo
908 butlerConfig.configFile = None
909 with self.assertRaises(ValueError):
910 Butler(butlerConfig)
912 with self.assertRaises(FileExistsError):
913 Butler.makeRepo(self.root, standalone=True,
914 config=Config(self.configFile), overwrite=False)
916 def testStringification(self):
917 butler = Butler(self.tmpConfigFile, run="ingest")
918 butlerStr = str(butler)
920 if self.datastoreStr is not None:
921 for testStr in self.datastoreStr:
922 self.assertIn(testStr, butlerStr)
923 if self.registryStr is not None:
924 self.assertIn(self.registryStr, butlerStr)
926 datastoreName = butler.datastore.name
927 if self.datastoreName is not None:
928 for testStr in self.datastoreName:
929 self.assertIn(testStr, datastoreName)
931 def testButlerRewriteDataId(self):
932 """Test that dataIds can be rewritten based on dimension records."""
934 butler = Butler(self.tmpConfigFile, run="ingest")
936 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
937 datasetTypeName = "random_data"
939 # Create dimension records.
940 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
941 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
942 "name": "d-r",
943 "band": "R"})
944 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp",
945 "id": 1, "full_name": "det1"})
947 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"])
948 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
949 butler.registry.registerDatasetType(datasetType)
951 n_exposures = 5
952 dayobs = 20210530
954 for i in range(n_exposures):
955 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp",
956 "id": i, "obs_id": f"exp{i}",
957 "seq_num": i, "day_obs": dayobs,
958 "physical_filter": "d-r"})
960 # Write some data.
961 for i in range(n_exposures):
962 metric = {"something": i,
963 "other": "metric",
964 "list": [2*x for x in range(i)]}
966 # Use the seq_num for the put to test rewriting.
967 dataId = {"seq_num": i, "day_obs": dayobs, "detector": 1, "instrument": "DummyCamComp",
968 "physical_filter": "d-r"}
969 ref = butler.put(metric, datasetTypeName, dataId=dataId)
971 # Check that the exposure is correct in the dataId
972 self.assertEqual(ref.dataId["exposure"], i)
974 # and check that we can get the dataset back with the same dataId
975 new_metric = butler.get(datasetTypeName, dataId=dataId)
976 self.assertEqual(new_metric, metric)
979class FileDatastoreButlerTests(ButlerTests):
980 """Common tests and specialization of ButlerTests for butlers backed
981 by datastores that inherit from FileDatastore.
982 """
984 def checkFileExists(self, root, relpath):
985 """Checks if file exists at a given path (relative to root).
987 Test testPutTemplates verifies actual physical existance of the files
988 in the requested location.
989 """
990 uri = ButlerURI(root, forceDirectory=True)
991 return uri.join(relpath).exists()
993 def testPutTemplates(self):
994 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
995 butler = Butler(self.tmpConfigFile, run="ingest")
997 # Add needed Dimensions
998 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
999 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
1000 "name": "d-r",
1001 "band": "R"})
1002 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
1003 "physical_filter": "d-r"})
1004 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
1005 "physical_filter": "d-r"})
1007 # Create and store a dataset
1008 metric = makeExampleMetrics()
1010 # Create two almost-identical DatasetTypes (both will use default
1011 # template)
1012 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
1013 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1014 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1015 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1017 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1018 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1020 # Put with exactly the data ID keys needed
1021 ref = butler.put(metric, "metric1", dataId1)
1022 uri = butler.getURI(ref)
1023 self.assertTrue(self.checkFileExists(butler.datastore.root,
1024 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
1025 f"Checking existence of {uri}")
1027 # Check the template based on dimensions
1028 butler.datastore.templates.validateTemplates([ref])
1030 # Put with extra data ID keys (physical_filter is an optional
1031 # dependency); should not change template (at least the way we're
1032 # defining them to behave now; the important thing is that they
1033 # must be consistent).
1034 ref = butler.put(metric, "metric2", dataId2)
1035 uri = butler.getURI(ref)
1036 self.assertTrue(self.checkFileExists(butler.datastore.root,
1037 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
1038 f"Checking existence of {uri}")
1040 # Check the template based on dimensions
1041 butler.datastore.templates.validateTemplates([ref])
1043 # Now use a file template that will not result in unique filenames
1044 with self.assertRaises(FileTemplateValidationError):
1045 butler.put(metric, "metric3", dataId1)
1047 def testImportExport(self):
1048 # Run put/get tests just to create and populate a repo.
1049 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1050 self.runImportExportTest(storageClass)
1052 @unittest.expectedFailure
1053 def testImportExportVirtualComposite(self):
1054 # Run put/get tests just to create and populate a repo.
1055 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1056 self.runImportExportTest(storageClass)
1058 def runImportExportTest(self, storageClass):
1059 """This test does an export to a temp directory and an import back
1060 into a new temp directory repo. It does not assume a posix datastore"""
1061 exportButler = self.runPutGetTest(storageClass, "test_metric")
1062 print("Root:", exportButler.datastore.root)
1063 # Test that the repo actually has at least one dataset.
1064 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1065 self.assertGreater(len(datasets), 0)
1066 # Add a DimensionRecord that's unused by those datasets.
1067 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1068 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1069 # Export and then import datasets.
1070 with safeTestTempDir(TESTDIR) as exportDir:
1071 exportFile = os.path.join(exportDir, "exports.yaml")
1072 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1073 export.saveDatasets(datasets)
1074 # Export the same datasets again. This should quietly do
1075 # nothing because of internal deduplication, and it shouldn't
1076 # complain about being asked to export the "htm7" elements even
1077 # though there aren't any in these datasets or in the database.
1078 export.saveDatasets(datasets, elements=["htm7"])
1079 # Save one of the data IDs again; this should be harmless
1080 # because of internal deduplication.
1081 export.saveDataIds([datasets[0].dataId])
1082 # Save some dimension records directly.
1083 export.saveDimensionData("skymap", [skymapRecord])
1084 self.assertTrue(os.path.exists(exportFile))
1085 with safeTestTempDir(TESTDIR) as importDir:
1086 # We always want this to be a local posix butler
1087 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1088 # Calling script.butlerImport tests the implementation of the
1089 # butler command line interface "import" subcommand. Functions
1090 # in the script folder are generally considered protected and
1091 # should not be used as public api.
1092 with open(exportFile, "r") as f:
1093 script.butlerImport(importDir, export_file=f, directory=exportDir,
1094 transfer="auto", skip_dimensions=None, reuse_ids=False)
1095 importButler = Butler(importDir, run="ingest")
1096 for ref in datasets:
1097 with self.subTest(ref=ref):
1098 # Test for existence by passing in the DatasetType and
1099 # data ID separately, to avoid lookup by dataset_id.
1100 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
1101 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
1102 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
1104 def testRemoveRuns(self):
1105 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1106 butler = Butler(self.tmpConfigFile, writeable=True)
1107 # Load registry data with dimensions to hang datasets off of.
1108 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1109 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1110 # Add some RUN-type collection.
1111 run1 = "run1"
1112 butler.registry.registerRun(run1)
1113 run2 = "run2"
1114 butler.registry.registerRun(run2)
1115 # put a dataset in each
1116 metric = makeExampleMetrics()
1117 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1118 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
1119 butler.registry)
1120 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1121 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1122 uri1 = butler.getURI(ref1, collections=[run1])
1123 uri2 = butler.getURI(ref2, collections=[run2])
1124 # Remove from both runs with different values for unstore.
1125 butler.removeRuns([run1], unstore=True)
1126 butler.removeRuns([run2], unstore=False)
1127 # Should be nothing in registry for either one, and datastore should
1128 # not think either exists.
1129 with self.assertRaises(MissingCollectionError):
1130 butler.registry.getCollectionType(run1)
1131 with self.assertRaises(MissingCollectionError):
1132 butler.registry.getCollectionType(run2)
1133 self.assertFalse(butler.datastore.exists(ref1))
1134 self.assertFalse(butler.datastore.exists(ref2))
1135 # The ref we unstored should be gone according to the URI, but the
1136 # one we forgot should still be around.
1137 self.assertFalse(uri1.exists())
1138 self.assertTrue(uri2.exists())
1141class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1142 """PosixDatastore specialization of a butler"""
1143 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1144 fullConfigKey = ".datastore.formatters"
1145 validationCanFail = True
1146 datastoreStr = ["/tmp"]
1147 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1148 registryStr = "/gen3.sqlite3"
1150 def testExportTransferCopy(self):
1151 """Test local export using all transfer modes"""
1152 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1153 exportButler = self.runPutGetTest(storageClass, "test_metric")
1154 # Test that the repo actually has at least one dataset.
1155 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1156 self.assertGreater(len(datasets), 0)
1157 uris = [exportButler.getURI(d) for d in datasets]
1158 datastoreRoot = exportButler.datastore.root
1160 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1162 for path in pathsInStore:
1163 # Assume local file system
1164 self.assertTrue(self.checkFileExists(datastoreRoot, path),
1165 f"Checking path {path}")
1167 for transfer in ("copy", "link", "symlink", "relsymlink"):
1168 with safeTestTempDir(TESTDIR) as exportDir:
1169 with exportButler.export(directory=exportDir, format="yaml",
1170 transfer=transfer) as export:
1171 export.saveDatasets(datasets)
1172 for path in pathsInStore:
1173 self.assertTrue(self.checkFileExists(exportDir, path),
1174 f"Check that mode {transfer} exported files")
1176 def testPruneDatasets(self):
1177 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1178 butler = Butler(self.tmpConfigFile, writeable=True)
1179 # Load registry data with dimensions to hang datasets off of.
1180 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1181 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1182 # Add some RUN-type collections.
1183 run1 = "run1"
1184 butler.registry.registerRun(run1)
1185 run2 = "run2"
1186 butler.registry.registerRun(run2)
1187 # put some datasets. ref1 and ref2 have the same data ID, and are in
1188 # different runs. ref3 has a different data ID.
1189 metric = makeExampleMetrics()
1190 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1191 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
1192 butler.registry)
1193 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1194 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1195 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1197 # Simple prune.
1198 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1199 with self.assertRaises(LookupError):
1200 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1)
1202 # Put data back.
1203 ref1 = butler.put(metric, ref1.unresolved(), run=run1)
1204 ref2 = butler.put(metric, ref2.unresolved(), run=run2)
1205 ref3 = butler.put(metric, ref3.unresolved(), run=run1)
1207 # Check that in normal mode, deleting the record will lead to
1208 # trash not touching the file.
1209 uri1 = butler.datastore.getURI(ref1)
1210 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table
1211 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id})
1212 butler.datastore.trash(ref1)
1213 butler.datastore.emptyTrash()
1214 self.assertTrue(uri1.exists())
1215 uri1.remove() # Clean it up.
1217 # Simulate execution butler setup by deleting the datastore
1218 # record but keeping the file around and trusting.
1219 butler.datastore.trustGetRequest = True
1220 uri2 = butler.datastore.getURI(ref2)
1221 uri3 = butler.datastore.getURI(ref3)
1222 self.assertTrue(uri2.exists())
1223 self.assertTrue(uri3.exists())
1225 # Remove the datastore record.
1226 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table
1227 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id})
1228 self.assertTrue(uri2.exists())
1229 butler.datastore.trash([ref2, ref3])
1230 # Immediate removal for ref2 file
1231 self.assertFalse(uri2.exists())
1232 # But ref3 has to wait for the empty.
1233 self.assertTrue(uri3.exists())
1234 butler.datastore.emptyTrash()
1235 self.assertFalse(uri3.exists())
1237 # Clear out the datasets from registry.
1238 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1241class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1242 """InMemoryDatastore specialization of a butler"""
1243 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1244 fullConfigKey = None
1245 useTempRoot = False
1246 validationCanFail = False
1247 datastoreStr = ["datastore='InMemory"]
1248 datastoreName = ["InMemoryDatastore@"]
1249 registryStr = "/gen3.sqlite3"
1251 def testIngest(self):
1252 pass
1255class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1256 """PosixDatastore specialization"""
1257 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1258 fullConfigKey = ".datastore.datastores.1.formatters"
1259 validationCanFail = True
1260 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1261 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1262 "SecondDatastore"]
1263 registryStr = "/gen3.sqlite3"
1266class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1267 """Test that a yaml file in one location can refer to a root in another."""
1269 datastoreStr = ["dir1"]
1270 # Disable the makeRepo test since we are deliberately not using
1271 # butler.yaml as the config name.
1272 fullConfigKey = None
1274 def setUp(self):
1275 self.root = makeTestTempDir(TESTDIR)
1277 # Make a new repository in one place
1278 self.dir1 = os.path.join(self.root, "dir1")
1279 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1281 # Move the yaml file to a different place and add a "root"
1282 self.dir2 = os.path.join(self.root, "dir2")
1283 safeMakeDir(self.dir2)
1284 configFile1 = os.path.join(self.dir1, "butler.yaml")
1285 config = Config(configFile1)
1286 config["root"] = self.dir1
1287 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1288 config.dumpToUri(configFile2)
1289 os.remove(configFile1)
1290 self.tmpConfigFile = configFile2
1292 def testFileLocations(self):
1293 self.assertNotEqual(self.dir1, self.dir2)
1294 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1295 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1296 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1299class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1300 """Test that a config file created by makeRepo outside of repo works."""
1302 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1304 def setUp(self):
1305 self.root = makeTestTempDir(TESTDIR)
1306 self.root2 = makeTestTempDir(TESTDIR)
1308 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1309 Butler.makeRepo(self.root, config=Config(self.configFile),
1310 outfile=self.tmpConfigFile)
1312 def tearDown(self):
1313 if os.path.exists(self.root2):
1314 shutil.rmtree(self.root2, ignore_errors=True)
1315 super().tearDown()
1317 def testConfigExistence(self):
1318 c = Config(self.tmpConfigFile)
1319 uri_config = ButlerURI(c["root"])
1320 uri_expected = ButlerURI(self.root, forceDirectory=True)
1321 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1322 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1324 def testPutGet(self):
1325 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1326 self.runPutGetTest(storageClass, "test_metric")
1329class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1330 """Test that a config file created by makeRepo outside of repo works."""
1332 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1334 def setUp(self):
1335 self.root = makeTestTempDir(TESTDIR)
1336 self.root2 = makeTestTempDir(TESTDIR)
1338 self.tmpConfigFile = self.root2
1339 Butler.makeRepo(self.root, config=Config(self.configFile),
1340 outfile=self.tmpConfigFile)
1342 def testConfigExistence(self):
1343 # Append the yaml file else Config constructor does not know the file
1344 # type.
1345 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1346 super().testConfigExistence()
1349class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1350 """Test that a config file created by makeRepo outside of repo works."""
1352 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1354 def setUp(self):
1355 self.root = makeTestTempDir(TESTDIR)
1356 self.root2 = makeTestTempDir(TESTDIR)
1358 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1359 Butler.makeRepo(self.root, config=Config(self.configFile),
1360 outfile=self.tmpConfigFile)
1363@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1364@mock_s3
1365class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1366 """S3Datastore specialization of a butler; an S3 storage Datastore +
1367 a local in-memory SqlRegistry.
1368 """
1369 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1370 fullConfigKey = None
1371 validationCanFail = True
1373 bucketName = "anybucketname"
1374 """Name of the Bucket that will be used in the tests. The name is read from
1375 the config file used with the tests during set-up.
1376 """
1378 root = "butlerRoot/"
1379 """Root repository directory expected to be used in case useTempRoot=False.
1380 Otherwise the root is set to a 20 characters long randomly generated string
1381 during set-up.
1382 """
1384 datastoreStr = [f"datastore={root}"]
1385 """Contains all expected root locations in a format expected to be
1386 returned by Butler stringification.
1387 """
1389 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1390 """The expected format of the S3 Datastore string."""
1392 registryStr = "/gen3.sqlite3"
1393 """Expected format of the Registry string."""
1395 def genRoot(self):
1396 """Returns a random string of len 20 to serve as a root
1397 name for the temporary bucket repo.
1399 This is equivalent to tempfile.mkdtemp as this is what self.root
1400 becomes when useTempRoot is True.
1401 """
1402 rndstr = "".join(
1403 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1404 )
1405 return rndstr + "/"
1407 def setUp(self):
1408 config = Config(self.configFile)
1409 uri = ButlerURI(config[".datastore.datastore.root"])
1410 self.bucketName = uri.netloc
1412 # set up some fake credentials if they do not exist
1413 self.usingDummyCredentials = setAwsEnvCredentials()
1415 if self.useTempRoot:
1416 self.root = self.genRoot()
1417 rooturi = f"s3://{self.bucketName}/{self.root}"
1418 config.update({"datastore": {"datastore": {"root": rooturi}}})
1420 # need local folder to store registry database
1421 self.reg_dir = makeTestTempDir(TESTDIR)
1422 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1424 # MOTO needs to know that we expect Bucket bucketname to exist
1425 # (this used to be the class attribute bucketName)
1426 s3 = boto3.resource("s3")
1427 s3.create_bucket(Bucket=self.bucketName)
1429 self.datastoreStr = f"datastore={self.root}"
1430 self.datastoreName = [f"FileDatastore@{rooturi}"]
1431 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1432 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1434 def tearDown(self):
1435 s3 = boto3.resource("s3")
1436 bucket = s3.Bucket(self.bucketName)
1437 try:
1438 bucket.objects.all().delete()
1439 except botocore.exceptions.ClientError as e:
1440 if e.response["Error"]["Code"] == "404":
1441 # the key was not reachable - pass
1442 pass
1443 else:
1444 raise
1446 bucket = s3.Bucket(self.bucketName)
1447 bucket.delete()
1449 # unset any potentially set dummy credentials
1450 if self.usingDummyCredentials:
1451 unsetAwsEnvCredentials()
1453 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1454 shutil.rmtree(self.reg_dir, ignore_errors=True)
1456 if self.useTempRoot and os.path.exists(self.root):
1457 shutil.rmtree(self.root, ignore_errors=True)
1460@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1461# Mock required environment variables during tests
1462@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1463 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1464 TESTDIR, "config/testConfigs/webdav/token"),
1465 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1466class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1467 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1468 a local in-memory SqlRegistry.
1469 """
1470 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1471 fullConfigKey = None
1472 validationCanFail = True
1474 serverName = "localhost"
1475 """Name of the server that will be used in the tests.
1476 """
1478 portNumber = 8080
1479 """Port on which the webdav server listens. Automatically chosen
1480 at setUpClass via the _getfreeport() method
1481 """
1483 root = "butlerRoot/"
1484 """Root repository directory expected to be used in case useTempRoot=False.
1485 Otherwise the root is set to a 20 characters long randomly generated string
1486 during set-up.
1487 """
1489 datastoreStr = [f"datastore={root}"]
1490 """Contains all expected root locations in a format expected to be
1491 returned by Butler stringification.
1492 """
1494 datastoreName = ["FileDatastore@https://{serverName}/{root}"]
1495 """The expected format of the WebdavDatastore string."""
1497 registryStr = "/gen3.sqlite3"
1498 """Expected format of the Registry string."""
1500 serverThread = None
1501 """Thread in which the local webdav server will run"""
1503 stopWebdavServer = False
1504 """This flag will cause the webdav server to
1505 gracefully shut down when True
1506 """
1508 def genRoot(self):
1509 """Returns a random string of len 20 to serve as a root
1510 name for the temporary bucket repo.
1512 This is equivalent to tempfile.mkdtemp as this is what self.root
1513 becomes when useTempRoot is True.
1514 """
1515 rndstr = "".join(
1516 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1517 )
1518 return rndstr + "/"
1520 @classmethod
1521 def setUpClass(cls):
1522 # Do the same as inherited class
1523 cls.storageClassFactory = StorageClassFactory()
1524 cls.storageClassFactory.addFromConfig(cls.configFile)
1526 cls.portNumber = cls._getfreeport()
1527 # Run a local webdav server on which tests will be run
1528 cls.serverThread = Thread(target=cls._serveWebdav,
1529 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1530 daemon=True)
1531 cls.serverThread.start()
1532 # Wait for it to start
1533 time.sleep(3)
1535 @classmethod
1536 def tearDownClass(cls):
1537 # Ask for graceful shut down of the webdav server
1538 cls.stopWebdavServer = True
1539 # Wait for the thread to exit
1540 cls.serverThread.join()
1542 # Mock required environment variables during tests
1543 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1544 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1545 TESTDIR, "config/testConfigs/webdav/token"),
1546 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1547 def setUp(self):
1548 config = Config(self.configFile)
1550 if self.useTempRoot:
1551 self.root = self.genRoot()
1552 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1553 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1555 # need local folder to store registry database
1556 self.reg_dir = makeTestTempDir(TESTDIR)
1557 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1559 self.datastoreStr = f"datastore={self.root}"
1560 self.datastoreName = [f"FileDatastore@{self.rooturi}"]
1562 if not _is_webdav_endpoint(self.rooturi):
1563 raise OSError("Webdav server not running properly: cannot run tests.")
1565 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1566 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1568 # Mock required environment variables during tests
1569 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1570 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1571 TESTDIR, "config/testConfigs/webdav/token"),
1572 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1573 def tearDown(self):
1574 # Clear temporary directory
1575 ButlerURI(self.rooturi).remove()
1576 ButlerURI(self.rooturi).session.close()
1578 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1579 shutil.rmtree(self.reg_dir, ignore_errors=True)
1581 if self.useTempRoot and os.path.exists(self.root):
1582 shutil.rmtree(self.root, ignore_errors=True)
1584 def _serveWebdav(self, port: int, stopWebdavServer):
1585 """Starts a local webdav-compatible HTTP server,
1586 Listening on http://localhost:port
1587 This server only runs when this test class is instantiated,
1588 and then shuts down. Must be started is a separate thread.
1590 Parameters
1591 ----------
1592 port : `int`
1593 The port number on which the server should listen
1594 """
1595 root_path = gettempdir()
1597 config = {
1598 "host": "0.0.0.0",
1599 "port": port,
1600 "provider_mapping": {"/": root_path},
1601 "http_authenticator": {
1602 "domain_controller": None
1603 },
1604 "simple_dc": {"user_mapping": {"*": True}},
1605 "verbose": 0,
1606 }
1607 app = WsgiDAVApp(config)
1609 server_args = {
1610 "bind_addr": (config["host"], config["port"]),
1611 "wsgi_app": app,
1612 }
1613 server = wsgi.Server(**server_args)
1614 server.prepare()
1616 try:
1617 # Start the actual server in a separate thread
1618 t = Thread(target=server.serve, daemon=True)
1619 t.start()
1620 # watch stopWebdavServer, and gracefully
1621 # shut down the server when True
1622 while True:
1623 if stopWebdavServer():
1624 break
1625 time.sleep(1)
1626 except KeyboardInterrupt:
1627 print("Caught Ctrl-C, shutting down...")
1628 finally:
1629 server.stop()
1630 t.join()
1632 def _getfreeport():
1633 """
1634 Determines a free port using sockets.
1635 """
1636 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1637 free_socket.bind(('0.0.0.0', 0))
1638 free_socket.listen()
1639 port = free_socket.getsockname()[1]
1640 free_socket.close()
1641 return port
1644class PosixDatastoreTransfers(unittest.TestCase):
1645 """Test data transfers between butlers.
1647 Test for different managers. UUID to UUID and integer to integer are
1648 tested. UUID to integer is not supported since we do not currently
1649 want to allow that. Integer to UUID is supported with the caveat
1650 that UUID4 will be generated and this will be incorrect for raw
1651 dataset types. The test ignores that.
1652 """
1654 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1656 @classmethod
1657 def setUpClass(cls):
1658 cls.storageClassFactory = StorageClassFactory()
1659 cls.storageClassFactory.addFromConfig(cls.configFile)
1661 def setUp(self):
1662 self.root = makeTestTempDir(TESTDIR)
1663 self.config = Config(self.configFile)
1665 def tearDown(self):
1666 removeTestTempDir(self.root)
1668 def create_butler(self, manager, label):
1669 config = Config(self.configFile)
1670 config["registry", "managers", "datasets"] = manager
1671 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config),
1672 writeable=True)
1674 def create_butlers(self, manager1, manager2):
1675 self.source_butler = self.create_butler(manager1, "1")
1676 self.target_butler = self.create_butler(manager2, "2")
1678 def testTransferUuidToUuid(self):
1679 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1680 "ByDimensionsDatasetRecordStorageManagerUUID",
1681 "lsst.daf.butler.registry.datasets.byDimensions."
1682 "ByDimensionsDatasetRecordStorageManagerUUID",
1683 )
1684 # Setting id_gen_map should have no effect here
1685 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
1687 def testTransferIntToInt(self):
1688 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1689 "ByDimensionsDatasetRecordStorageManager",
1690 "lsst.daf.butler.registry.datasets.byDimensions."
1691 "ByDimensionsDatasetRecordStorageManager",
1692 )
1693 # int dataset ID only allows UNIQUE
1694 self.assertButlerTransfers()
1696 def testTransferIntToUuid(self):
1697 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1698 "ByDimensionsDatasetRecordStorageManager",
1699 "lsst.daf.butler.registry.datasets.byDimensions."
1700 "ByDimensionsDatasetRecordStorageManagerUUID",
1701 )
1702 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
1704 def testTransferMissing(self):
1705 """Test transfers where datastore records are missing.
1707 This is how execution butler works.
1708 """
1709 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1710 "ByDimensionsDatasetRecordStorageManagerUUID",
1711 "lsst.daf.butler.registry.datasets.byDimensions."
1712 "ByDimensionsDatasetRecordStorageManagerUUID",
1713 )
1715 # Configure the source butler to allow trust.
1716 self.source_butler.datastore.trustGetRequest = True
1718 self.assertButlerTransfers(purge=True)
1720 def testTransferMissingDisassembly(self):
1721 """Test transfers where datastore records are missing.
1723 This is how execution butler works.
1724 """
1725 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1726 "ByDimensionsDatasetRecordStorageManagerUUID",
1727 "lsst.daf.butler.registry.datasets.byDimensions."
1728 "ByDimensionsDatasetRecordStorageManagerUUID",
1729 )
1731 # Configure the source butler to allow trust.
1732 self.source_butler.datastore.trustGetRequest = True
1734 # Test disassembly.
1735 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
1737 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"):
1738 """Test that a run can be transferred to another butler."""
1740 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
1741 datasetTypeName = "random_data"
1743 # Test will create 3 collections and we will want to transfer
1744 # two of those three.
1745 runs = ["run1", "run2", "other"]
1747 # Also want to use two different dataset types to ensure that
1748 # grouping works.
1749 datasetTypeNames = ["random_data", "random_data_2"]
1751 # Create the run collections in the source butler.
1752 for run in runs:
1753 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
1755 # Create dimensions in both butlers (transfer will not create them).
1756 n_exposures = 30
1757 for butler in (self.source_butler, self.target_butler):
1758 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1759 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
1760 "name": "d-r",
1761 "band": "R"})
1762 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp",
1763 "id": 1, "full_name": "det1"})
1765 for i in range(n_exposures):
1766 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp",
1767 "id": i, "obs_id": f"exp{i}",
1768 "physical_filter": "d-r"})
1770 # Create dataset types in the source butler.
1771 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"])
1772 for datasetTypeName in datasetTypeNames:
1773 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1774 self.source_butler.registry.registerDatasetType(datasetType)
1776 # Write a dataset to an unrelated run -- this will ensure that
1777 # we are rewriting integer dataset ids in the target if necessary.
1778 # Will not be relevant for UUID.
1779 run = "distraction"
1780 butler = Butler(butler=self.source_butler, run=run)
1781 butler.put(makeExampleMetrics(), datasetTypeName,
1782 exposure=1, detector=1, instrument="DummyCamComp", physical_filter="d-r")
1784 # Write some example metrics to the source
1785 butler = Butler(butler=self.source_butler)
1787 # Set of DatasetRefs that should be in the list of refs to transfer
1788 # but which will not be transferred.
1789 deleted = set()
1791 n_expected = 20 # Number of datasets expected to be transferred
1792 source_refs = []
1793 for i in range(n_exposures):
1794 # Put a third of datasets into each collection, only retain
1795 # two thirds.
1796 index = i % 3
1797 run = runs[index]
1798 datasetTypeName = datasetTypeNames[i % 2]
1800 metric_data = {"summary": {"counter": i},
1801 "output": {"text": "metric"},
1802 "data": [2*x for x in range(i)]}
1803 metric = MetricsExample(**metric_data)
1804 dataId = {"exposure": i, "detector": 1, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1805 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
1807 # Remove the datastore record using low-level API
1808 if purge:
1809 # Remove records for a fraction.
1810 if index == 1:
1812 # For one of these delete the file as well.
1813 # This allows the "missing" code to filter the
1814 # file out.
1815 if not deleted:
1816 primary, uris = butler.datastore.getURIs(ref)
1817 if primary:
1818 primary.remove()
1819 for uri in uris.values():
1820 uri.remove()
1821 n_expected -= 1
1822 deleted.add(ref)
1824 # Remove the datastore record.
1825 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1827 if index < 2:
1828 source_refs.append(ref)
1829 if ref not in deleted:
1830 new_metric = butler.get(ref.unresolved(), collections=run)
1831 self.assertEqual(new_metric, metric)
1833 # Create some bad dataset types to ensure we check for inconsistent
1834 # definitions.
1835 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
1836 for datasetTypeName in datasetTypeNames:
1837 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
1838 self.target_butler.registry.registerDatasetType(datasetType)
1839 with self.assertRaises(ConflictingDefinitionError):
1840 self.target_butler.transfer_from(self.source_butler, source_refs,
1841 id_gen_map=id_gen_map)
1842 # And remove the bad definitions.
1843 for datasetTypeName in datasetTypeNames:
1844 self.target_butler.registry.removeDatasetType(datasetTypeName)
1846 # Transfer without creating dataset types should fail.
1847 with self.assertRaises(KeyError):
1848 self.target_butler.transfer_from(self.source_butler, source_refs,
1849 id_gen_map=id_gen_map)
1851 # Now transfer them to the second butler
1852 with self.assertLogs(level=logging.DEBUG) as cm:
1853 transferred = self.target_butler.transfer_from(self.source_butler, source_refs,
1854 id_gen_map=id_gen_map,
1855 register_dataset_types=True)
1856 self.assertEqual(len(transferred), n_expected)
1857 log_output = ";".join(cm.output)
1858 self.assertIn("found in datastore for chunk", log_output)
1859 self.assertIn("Creating output run", log_output)
1861 # Do the transfer twice to ensure that it will do nothing extra.
1862 # Only do this if purge=True because it does not work for int
1863 # dataset_id.
1864 if purge:
1865 # This should not need to register dataset types.
1866 transferred = self.target_butler.transfer_from(self.source_butler, source_refs,
1867 id_gen_map=id_gen_map)
1868 self.assertEqual(len(transferred), n_expected)
1870 # Also do an explicit low-level transfer to trigger some
1871 # edge cases.
1872 with self.assertLogs(level=logging.DEBUG) as cm:
1873 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs)
1874 log_output = ";".join(cm.output)
1875 self.assertIn("no file artifacts exist", log_output)
1877 with self.assertRaises(TypeError):
1878 self.target_butler.datastore.transfer_from(self.source_butler, source_refs)
1880 with self.assertRaises(ValueError):
1881 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs,
1882 transfer="split")
1884 # Now try to get the same refs from the new butler.
1885 for ref in source_refs:
1886 if ref not in deleted:
1887 unresolved_ref = ref.unresolved()
1888 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run)
1889 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run)
1890 self.assertEqual(new_metric, old_metric)
1892 # Now prune run2 collection and create instead a CHAINED collection.
1893 # This should block the transfer.
1894 self.target_butler.pruneCollection("run2", purge=True, unstore=True)
1895 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
1896 with self.assertRaises(TypeError):
1897 # Re-importing the run1 datasets can be problematic if they
1898 # use integer IDs so filter those out.
1899 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
1900 self.target_butler.transfer_from(self.source_butler, to_transfer,
1901 id_gen_map=id_gen_map)
1904if __name__ == "__main__": 1904 ↛ 1905line 1904 didn't jump to line 1905, because the condition on line 1904 was never true
1905 unittest.main()