Coverage for tests/test_butler.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import numpy as np
35try:
36 import boto3
37 import botocore
38 from moto import mock_s3
39except ImportError:
40 boto3 = None
42 def mock_s3(cls):
43 """A no-op decorator in case moto mock_s3 can not be imported.
44 """
45 return cls
47from lsst.utils import doImport
48from lsst.daf.butler.core.safeFileIo import safeMakeDir
49from lsst.daf.butler import Butler, Config, ButlerConfig
50from lsst.daf.butler import StorageClassFactory
51from lsst.daf.butler import DatasetType, DatasetRef
52from lsst.daf.butler import FileTemplateValidationError, ValidationError
53from lsst.daf.butler import FileDataset
54from lsst.daf.butler import CollectionSearch, CollectionType
55from lsst.daf.butler.registry import MissingCollectionError
56from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
57from lsst.daf.butler.core.location import ButlerURI
58from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
59 unsetAwsEnvCredentials)
61from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
63TESTDIR = os.path.abspath(os.path.dirname(__file__))
66def makeExampleMetrics():
67 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
68 {"a": [1, 2, 3],
69 "b": {"blue": 5, "red": "green"}},
70 [563, 234, 456.7, 752, 8, 9, 27]
71 )
74class TransactionTestError(Exception):
75 """Specific error for testing transactions, to prevent misdiagnosing
76 that might otherwise occur when a standard exception is used.
77 """
78 pass
81class ButlerConfigTests(unittest.TestCase):
82 """Simple tests for ButlerConfig that are not tested in other test cases.
83 """
85 def testSearchPath(self):
86 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
87 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
88 config1 = ButlerConfig(configFile)
89 self.assertNotIn("testConfigs", "\n".join(cm.output))
91 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
92 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
93 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
94 self.assertIn("testConfigs", "\n".join(cm.output))
96 key = ("datastore", "records", "table")
97 self.assertNotEqual(config1[key], config2[key])
98 self.assertEqual(config2[key], "override_record")
101class ButlerPutGetTests:
102 """Helper method for running a suite of put/get tests from different
103 butler configurations."""
105 root = None
107 @staticmethod
108 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
109 """Create a DatasetType and register it
110 """
111 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
112 registry.registerDatasetType(datasetType)
113 return datasetType
115 @classmethod
116 def setUpClass(cls):
117 cls.storageClassFactory = StorageClassFactory()
118 cls.storageClassFactory.addFromConfig(cls.configFile)
120 def assertGetComponents(self, butler, datasetRef, components, reference):
121 datasetTypeName = datasetRef.datasetType.name
122 dataId = datasetRef.dataId
123 for component in components:
124 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
125 result = butler.get(compTypeName, dataId)
126 self.assertEqual(result, getattr(reference, component))
128 def tearDown(self):
129 if self.root is not None and os.path.exists(self.root):
130 shutil.rmtree(self.root, ignore_errors=True)
132 def runPutGetTest(self, storageClass, datasetTypeName):
133 # New datasets will be added to run and tag, but we will only look in
134 # tag when looking up datasets.
135 run = "ingest/run"
136 tag = "ingest"
137 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
139 # There will not be a collection yet
140 collections = set(butler.registry.queryCollections())
141 self.assertEqual(collections, set([run, tag]))
143 # Create and register a DatasetType
144 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
146 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
148 # Add needed Dimensions
149 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
150 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
151 "name": "d-r",
152 "abstract_filter": "R"})
153 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
154 "id": 1,
155 "name": "default"})
156 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
157 "name": "fourtwentythree", "physical_filter": "d-r",
158 "visit_system": 1})
160 # Create and store a dataset
161 metric = makeExampleMetrics()
162 dataId = {"instrument": "DummyCamComp", "visit": 423}
164 # Create a DatasetRef for put
165 refIn = DatasetRef(datasetType, dataId, id=None)
167 # Put with a preexisting id should fail
168 with self.assertRaises(ValueError):
169 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
171 # Put and remove the dataset once as a DatasetRef, once as a dataId,
172 # and once with a DatasetType
173 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
174 with self.subTest(args=args):
175 ref = butler.put(metric, *args)
176 self.assertIsInstance(ref, DatasetRef)
178 # Test getDirect
179 metricOut = butler.getDirect(ref)
180 self.assertEqual(metric, metricOut)
181 # Test get
182 metricOut = butler.get(ref.datasetType.name, dataId)
183 self.assertEqual(metric, metricOut)
184 # Test get with a datasetRef
185 metricOut = butler.get(ref)
186 self.assertEqual(metric, metricOut)
187 # Test getDeferred with dataId
188 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
189 self.assertEqual(metric, metricOut)
190 # Test getDeferred with a datasetRef
191 metricOut = butler.getDeferred(ref).get()
192 self.assertEqual(metric, metricOut)
194 # Check we can get components
195 if storageClass.isComposite():
196 self.assertGetComponents(butler, ref,
197 ("summary", "data", "output"), metric)
199 # Remove from the tagged collection only; after that we
200 # shouldn't be able to find it unless we use the dataset_id.
201 butler.pruneDatasets([ref])
202 with self.assertRaises(LookupError):
203 butler.datasetExists(*args)
204 # Registry still knows about it, if we use the dataset_id.
205 self.assertEqual(butler.registry.getDataset(ref.id), ref)
206 # If we use the output ref with the dataset_id, we should
207 # still be able to load it with getDirect().
208 self.assertEqual(metric, butler.getDirect(ref))
210 # Reinsert into collection, then delete from Datastore *and*
211 # remove from collection.
212 butler.registry.associate(tag, [ref])
213 butler.pruneDatasets([ref], unstore=True)
214 # Lookup with original args should still fail.
215 with self.assertRaises(LookupError):
216 butler.datasetExists(*args)
217 # Now getDirect() should fail, too.
218 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
219 butler.getDirect(ref)
220 # Registry still knows about it, if we use the dataset_id.
221 self.assertEqual(butler.registry.getDataset(ref.id), ref)
223 # Now remove the dataset completely.
224 butler.pruneDatasets([ref], purge=True, unstore=True)
225 # Lookup with original args should still fail.
226 with self.assertRaises(LookupError):
227 butler.datasetExists(*args)
228 # getDirect() should still fail.
229 with self.assertRaises(FileNotFoundError):
230 butler.getDirect(ref)
231 # Registry shouldn't be able to find it by dataset_id anymore.
232 self.assertIsNone(butler.registry.getDataset(ref.id))
234 # Put the dataset again, since the last thing we did was remove it.
235 ref = butler.put(metric, refIn)
237 # Get with parameters
238 stop = 4
239 sliced = butler.get(ref, parameters={"slice": slice(stop)})
240 self.assertNotEqual(metric, sliced)
241 self.assertEqual(metric.summary, sliced.summary)
242 self.assertEqual(metric.output, sliced.output)
243 self.assertEqual(metric.data[:stop], sliced.data)
244 # getDeferred with parameters
245 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
246 self.assertNotEqual(metric, sliced)
247 self.assertEqual(metric.summary, sliced.summary)
248 self.assertEqual(metric.output, sliced.output)
249 self.assertEqual(metric.data[:stop], sliced.data)
250 # getDeferred with deferred parameters
251 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
252 self.assertNotEqual(metric, sliced)
253 self.assertEqual(metric.summary, sliced.summary)
254 self.assertEqual(metric.output, sliced.output)
255 self.assertEqual(metric.data[:stop], sliced.data)
257 if storageClass.isComposite():
258 # Check that components can be retrieved
259 # ref.components will only be populated in certain cases
260 metricOut = butler.get(ref.datasetType.name, dataId)
261 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
262 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
263 summary = butler.get(compNameS, dataId)
264 self.assertEqual(summary, metric.summary)
265 data = butler.get(compNameD, dataId)
266 self.assertEqual(data, metric.data)
268 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
269 if ref.components:
270 self.assertTrue(butler.datastore.exists(ref.components["summary"]))
271 self.assertEqual(compRef, ref.components["summary"])
272 self.assertTrue(butler.datastore.exists(ref.components["data"]))
273 else:
274 self.assertTrue(compRef.hasParentId)
276 # Create a Dataset type that has the same name but is inconsistent.
277 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
278 self.storageClassFactory.getStorageClass("Config"))
280 # Getting with a dataset type that does not match registry fails
281 with self.assertRaises(ValueError):
282 butler.get(inconsistentDatasetType, dataId)
284 # Combining a DatasetRef with a dataId should fail
285 with self.assertRaises(ValueError):
286 butler.get(ref, dataId)
287 # Getting with an explicit ref should fail if the id doesn't match
288 with self.assertRaises(ValueError):
289 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
291 # Getting a dataset with unknown parameters should fail
292 with self.assertRaises(KeyError):
293 butler.get(ref, parameters={"unsupported": True})
295 # Check we have a collection
296 collections = set(butler.registry.queryCollections())
297 self.assertEqual(collections, {run, tag})
299 # Clean up to check that we can remove something that may have
300 # already had a component removed
301 butler.pruneDatasets([ref], unstore=True, purge=True)
303 # Add a dataset back in since some downstream tests require
304 # something to be present
305 ref = butler.put(metric, refIn)
307 return butler
309 def testDeferredCollectionPassing(self):
310 # Construct a butler with no run or collection, but make it writeable.
311 butler = Butler(self.tmpConfigFile, writeable=True)
312 # Create and register a DatasetType
313 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
314 datasetType = self.addDatasetType("example", dimensions,
315 self.storageClassFactory.getStorageClass("StructuredData"),
316 butler.registry)
317 # Add needed Dimensions
318 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
319 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
320 "name": "d-r",
321 "abstract_filter": "R"})
322 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
323 "name": "fourtwentythree", "physical_filter": "d-r"})
324 dataId = {"instrument": "DummyCamComp", "visit": 423}
325 # Create dataset.
326 metric = makeExampleMetrics()
327 # Register a new run and put dataset.
328 run = "deferred"
329 butler.registry.registerRun(run)
330 ref = butler.put(metric, datasetType, dataId, run=run)
331 # Putting with no run should fail with TypeError.
332 with self.assertRaises(TypeError):
333 butler.put(metric, datasetType, dataId)
334 # Dataset should exist.
335 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
336 # We should be able to get the dataset back, but with and without
337 # a deferred dataset handle.
338 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
339 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
340 # Trying to find the dataset without any collection is a TypeError.
341 with self.assertRaises(TypeError):
342 butler.datasetExists(datasetType, dataId)
343 with self.assertRaises(TypeError):
344 butler.get(datasetType, dataId)
345 # Associate the dataset with a different collection.
346 butler.registry.registerCollection("tagged")
347 butler.registry.associate("tagged", [ref])
348 # Deleting the dataset from the new collection should make it findable
349 # in the original collection.
350 butler.pruneDatasets([ref], tags=["tagged"])
351 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
354class ButlerTests(ButlerPutGetTests):
355 """Tests for Butler.
356 """
357 useTempRoot = True
359 def setUp(self):
360 """Create a new butler root for each test."""
361 if self.useTempRoot:
362 self.root = tempfile.mkdtemp(dir=TESTDIR)
363 Butler.makeRepo(self.root, config=Config(self.configFile))
364 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
365 else:
366 self.root = None
367 self.tmpConfigFile = self.configFile
369 def testConstructor(self):
370 """Independent test of constructor.
371 """
372 butler = Butler(self.tmpConfigFile, run="ingest")
373 self.assertIsInstance(butler, Butler)
375 collections = set(butler.registry.queryCollections())
376 self.assertEqual(collections, {"ingest"})
378 butler2 = Butler(butler=butler, collections=["other"])
379 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
380 self.assertIsNone(butler2.run)
381 self.assertIs(butler.registry, butler2.registry)
382 self.assertIs(butler.datastore, butler2.datastore)
384 def testBasicPutGet(self):
385 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
386 self.runPutGetTest(storageClass, "test_metric")
388 def testCompositePutGetConcrete(self):
389 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
390 self.runPutGetTest(storageClass, "test_metric")
392 def testCompositePutGetVirtual(self):
393 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
394 self.runPutGetTest(storageClass, "test_metric_comp")
396 def testIngest(self):
397 butler = Butler(self.tmpConfigFile, run="ingest")
399 # Create and register a DatasetType
400 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
402 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
403 datasetTypeName = "metric"
405 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
407 # Add needed Dimensions
408 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
409 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
410 "name": "d-r",
411 "abstract_filter": "R"})
412 for detector in (1, 2):
413 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
414 "full_name": f"detector{detector}"})
416 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
417 "name": "fourtwentythree", "physical_filter": "d-r"},
418 {"instrument": "DummyCamComp", "id": 424,
419 "name": "fourtwentyfour", "physical_filter": "d-r"})
421 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
422 dataRoot = os.path.join(TESTDIR, "data", "basic")
423 datasets = []
424 for detector in (1, 2):
425 detector_name = f"detector_{detector}"
426 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
427 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
428 # Create a DatasetRef for ingest
429 refIn = DatasetRef(datasetType, dataId, id=None)
431 datasets.append(FileDataset(path=metricFile,
432 refs=[refIn],
433 formatter=formatter))
435 butler.ingest(*datasets, transfer="copy")
437 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
438 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
440 metrics1 = butler.get(datasetTypeName, dataId1)
441 metrics2 = butler.get(datasetTypeName, dataId2)
442 self.assertNotEqual(metrics1, metrics2)
444 # Compare URIs
445 uri1 = butler.getUri(datasetTypeName, dataId1)
446 uri2 = butler.getUri(datasetTypeName, dataId2)
447 self.assertNotEqual(uri1, uri2)
449 # Now do a multi-dataset but single file ingest
450 metricFile = os.path.join(dataRoot, "detectors.yaml")
451 refs = []
452 for detector in (1, 2):
453 detector_name = f"detector_{detector}"
454 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
455 # Create a DatasetRef for ingest
456 refs.append(DatasetRef(datasetType, dataId, id=None))
458 datasets = []
459 datasets.append(FileDataset(path=metricFile,
460 refs=refs,
461 formatter=MultiDetectorFormatter))
463 butler.ingest(*datasets, transfer="copy")
465 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
466 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
468 multi1 = butler.get(datasetTypeName, dataId1)
469 multi2 = butler.get(datasetTypeName, dataId2)
471 self.assertEqual(multi1, metrics1)
472 self.assertEqual(multi2, metrics2)
474 # Compare URIs
475 uri1 = butler.getUri(datasetTypeName, dataId1)
476 uri2 = butler.getUri(datasetTypeName, dataId2)
477 self.assertEqual(uri1, uri2)
479 # Test that removing one does not break the second
480 # This line will issue a warning log message for a ChainedDatastore
481 # that uses an InMemoryDatastore since in-memory can not ingest
482 # files.
483 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
484 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
485 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
486 multi2b = butler.get(datasetTypeName, dataId2)
487 self.assertEqual(multi2, multi2b)
489 def testPruneCollections(self):
490 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
491 butler = Butler(self.tmpConfigFile, writeable=True)
492 # Load registry data with dimensions to hang datasets off of.
493 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
494 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
495 # Add some RUN-type collections.
496 run1 = "run1"
497 butler.registry.registerRun(run1)
498 run2 = "run2"
499 butler.registry.registerRun(run2)
500 # put some datasets. ref1 and ref2 have the same data ID, and are in
501 # different runs. ref3 has a different data ID.
502 metric = makeExampleMetrics()
503 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
504 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
505 butler.registry)
506 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
507 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
508 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
509 # Try to delete a RUN collection without purge, or with purge and not
510 # unstore.
511 with self.assertRaises(TypeError):
512 butler.pruneCollection(run1)
513 with self.assertRaises(TypeError):
514 butler.pruneCollection(run2, purge=True)
515 # Add a TAGGED collection and associate ref3 only into it.
516 tag1 = "tag1"
517 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
518 butler.registry.associate(tag1, [ref3])
519 # Add a CHAINED collection that searches run1 and then run2. It
520 # logically contains only ref1, because ref2 is shadowed due to them
521 # having the same data ID and dataset type.
522 chain1 = "chain1"
523 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
524 butler.registry.setCollectionChain(chain1, [run1, run2])
525 # Try to delete RUN collections, which should fail with complete
526 # rollback because they're still referenced by the CHAINED
527 # collection.
528 with self.assertRaises(Exception):
529 butler.pruneCollection(run1, pruge=True, unstore=True)
530 with self.assertRaises(Exception):
531 butler.pruneCollection(run2, pruge=True, unstore=True)
532 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
533 [ref1, ref2, ref3])
534 self.assertTrue(butler.datastore.exists(ref1))
535 self.assertTrue(butler.datastore.exists(ref2))
536 self.assertTrue(butler.datastore.exists(ref3))
537 # Try to delete CHAINED and TAGGED collections with purge; should not
538 # work.
539 with self.assertRaises(TypeError):
540 butler.pruneCollection(tag1, purge=True, unstore=True)
541 with self.assertRaises(TypeError):
542 butler.pruneCollection(chain1, purge=True, unstore=True)
543 # Remove the tagged collection with unstore=False. This should not
544 # affect the datasets.
545 butler.pruneCollection(tag1)
546 with self.assertRaises(MissingCollectionError):
547 butler.registry.getCollectionType(tag1)
548 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
549 [ref1, ref2, ref3])
550 self.assertTrue(butler.datastore.exists(ref1))
551 self.assertTrue(butler.datastore.exists(ref2))
552 self.assertTrue(butler.datastore.exists(ref3))
553 # Add the tagged collection back in, and remove it with unstore=True.
554 # This should remove ref3 only from the datastore.
555 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
556 butler.registry.associate(tag1, [ref3])
557 butler.pruneCollection(tag1, unstore=True)
558 with self.assertRaises(MissingCollectionError):
559 butler.registry.getCollectionType(tag1)
560 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
561 [ref1, ref2, ref3])
562 self.assertTrue(butler.datastore.exists(ref1))
563 self.assertTrue(butler.datastore.exists(ref2))
564 self.assertFalse(butler.datastore.exists(ref3))
565 # Delete the chain with unstore=False. The datasets should not be
566 # affected at all.
567 butler.pruneCollection(chain1)
568 with self.assertRaises(MissingCollectionError):
569 butler.registry.getCollectionType(chain1)
570 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
571 [ref1, ref2, ref3])
572 self.assertTrue(butler.datastore.exists(ref1))
573 self.assertTrue(butler.datastore.exists(ref2))
574 self.assertFalse(butler.datastore.exists(ref3))
575 # Redefine and then delete the chain with unstore=True. Only ref1
576 # should be unstored (ref3 has already been unstored, but otherwise
577 # would be now).
578 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
579 butler.registry.setCollectionChain(chain1, [run1, run2])
580 butler.pruneCollection(chain1, unstore=True)
581 with self.assertRaises(MissingCollectionError):
582 butler.registry.getCollectionType(chain1)
583 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
584 [ref1, ref2, ref3])
585 self.assertFalse(butler.datastore.exists(ref1))
586 self.assertTrue(butler.datastore.exists(ref2))
587 self.assertFalse(butler.datastore.exists(ref3))
588 # Remove run1. This removes ref1 and ref3 from the registry (they're
589 # already gone from the datastore, which is fine).
590 butler.pruneCollection(run1, purge=True, unstore=True)
591 with self.assertRaises(MissingCollectionError):
592 butler.registry.getCollectionType(run1)
593 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
594 [ref2])
595 self.assertTrue(butler.datastore.exists(ref2))
596 # Remove run2. This removes ref2 from the registry and the datastore.
597 butler.pruneCollection(run2, purge=True, unstore=True)
598 with self.assertRaises(MissingCollectionError):
599 butler.registry.getCollectionType(run2)
600 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
601 [])
603 def testPickle(self):
604 """Test pickle support.
605 """
606 butler = Butler(self.tmpConfigFile, run="ingest")
607 butlerOut = pickle.loads(pickle.dumps(butler))
608 self.assertIsInstance(butlerOut, Butler)
609 self.assertEqual(butlerOut._config, butler._config)
610 self.assertEqual(butlerOut.collections, butler.collections)
611 self.assertEqual(butlerOut.run, butler.run)
613 def testGetDatasetTypes(self):
614 butler = Butler(self.tmpConfigFile, run="ingest")
615 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
616 dimensionEntries = [
617 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
618 {"instrument": "DummyCamComp"}),
619 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
620 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
621 ]
622 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
623 # Add needed Dimensions
624 for args in dimensionEntries:
625 butler.registry.insertDimensionData(*args)
627 # When a DatasetType is added to the registry entries are created
628 # for each component. Need entries for each component in the test
629 # configuration otherwise validation won't work. The ones that
630 # are deliberately broken will be ignored later.
631 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"}
632 components = set()
633 for datasetTypeName in datasetTypeNames:
634 # Create and register a DatasetType
635 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
637 for componentName in storageClass.components:
638 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
640 fromRegistry = set(butler.registry.queryDatasetTypes())
641 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
643 # Now that we have some dataset types registered, validate them
644 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
645 "datasetType.component"])
647 # Add a new datasetType that will fail template validation
648 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
649 if self.validationCanFail:
650 with self.assertRaises(ValidationError):
651 butler.validateConfiguration()
653 # Rerun validation but with a subset of dataset type names
654 butler.validateConfiguration(datasetTypeNames=["metric4"])
656 # Rerun validation but ignore the bad datasetType
657 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
658 "datasetType.component"])
660 def testTransaction(self):
661 butler = Butler(self.tmpConfigFile, run="ingest")
662 datasetTypeName = "test_metric"
663 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
664 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
665 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
666 "abstract_filter": "R"}),
667 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
668 "physical_filter": "d-r"}))
669 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
670 metric = makeExampleMetrics()
671 dataId = {"instrument": "DummyCam", "visit": 42}
672 # Create and register a DatasetType
673 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
674 with self.assertRaises(TransactionTestError):
675 with butler.transaction():
676 # Add needed Dimensions
677 for args in dimensionEntries:
678 butler.registry.insertDimensionData(*args)
679 # Store a dataset
680 ref = butler.put(metric, datasetTypeName, dataId)
681 self.assertIsInstance(ref, DatasetRef)
682 # Test getDirect
683 metricOut = butler.getDirect(ref)
684 self.assertEqual(metric, metricOut)
685 # Test get
686 metricOut = butler.get(datasetTypeName, dataId)
687 self.assertEqual(metric, metricOut)
688 # Check we can get components
689 self.assertGetComponents(butler, ref,
690 ("summary", "data", "output"), metric)
691 raise TransactionTestError("This should roll back the entire transaction")
692 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
693 butler.registry.expandDataId(dataId)
694 # Should raise LookupError for missing data ID value
695 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
696 butler.get(datasetTypeName, dataId)
697 # Also check explicitly if Dataset entry is missing
698 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
699 # Direct retrieval should not find the file in the Datastore
700 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
701 butler.getDirect(ref)
703 def testMakeRepo(self):
704 """Test that we can write butler configuration to a new repository via
705 the Butler.makeRepo interface and then instantiate a butler from the
706 repo root.
707 """
708 # Do not run the test if we know this datastore configuration does
709 # not support a file system root
710 if self.fullConfigKey is None:
711 return
713 # Remove the file created in setUp
714 os.unlink(self.tmpConfigFile)
716 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile))
717 limited = Config(self.configFile)
718 butler1 = Butler(butlerConfig)
719 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
720 config=Config(self.configFile), overwrite=True)
721 full = Config(self.tmpConfigFile)
722 butler2 = Butler(butlerConfig)
723 # Butlers should have the same configuration regardless of whether
724 # defaults were expanded.
725 self.assertEqual(butler1._config, butler2._config)
726 # Config files loaded directly should not be the same.
727 self.assertNotEqual(limited, full)
728 # Make sure "limited" doesn't have a few keys we know it should be
729 # inheriting from defaults.
730 self.assertIn(self.fullConfigKey, full)
731 self.assertNotIn(self.fullConfigKey, limited)
733 # Collections don't appear until something is put in them
734 collections1 = set(butler1.registry.queryCollections())
735 self.assertEqual(collections1, set())
736 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
738 # Check that a config with no associated file name will not
739 # work properly with relocatable Butler repo
740 butlerConfig.configFile = None
741 with self.assertRaises(ValueError):
742 Butler(butlerConfig)
744 with self.assertRaises(FileExistsError):
745 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
746 config=Config(self.configFile), overwrite=False)
748 def testStringification(self):
749 butler = Butler(self.tmpConfigFile, run="ingest")
750 butlerStr = str(butler)
752 if self.datastoreStr is not None:
753 for testStr in self.datastoreStr:
754 self.assertIn(testStr, butlerStr)
755 if self.registryStr is not None:
756 self.assertIn(self.registryStr, butlerStr)
758 datastoreName = butler.datastore.name
759 if self.datastoreName is not None:
760 for testStr in self.datastoreName:
761 self.assertIn(testStr, datastoreName)
764class FileLikeDatastoreButlerTests(ButlerTests):
765 """Common tests and specialization of ButlerTests for butlers backed
766 by datastores that inherit from FileLikeDatastore.
767 """
769 def checkFileExists(self, root, path):
770 """Checks if file exists at a given path (relative to root).
772 Test testPutTemplates verifies actual physical existance of the files
773 in the requested location. For POSIXDatastore this test is equivalent
774 to `os.path.exists` call.
775 """
776 return os.path.exists(os.path.join(root, path))
778 def testPutTemplates(self):
779 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
780 butler = Butler(self.tmpConfigFile, run="ingest")
782 # Add needed Dimensions
783 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
784 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
785 "name": "d-r",
786 "abstract_filter": "R"})
787 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
788 "physical_filter": "d-r"})
789 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
790 "physical_filter": "d-r"})
792 # Create and store a dataset
793 metric = makeExampleMetrics()
795 # Create two almost-identical DatasetTypes (both will use default
796 # template)
797 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
798 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
799 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
800 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
802 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)}
803 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
804 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
806 # Put with exactly the data ID keys needed
807 ref = butler.put(metric, "metric1", dataId1)
808 self.assertTrue(self.checkFileExists(butler.datastore.root,
809 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
811 # Check the template based on dimensions
812 butler.datastore.templates.validateTemplates([ref])
814 # Put with extra data ID keys (physical_filter is an optional
815 # dependency); should not change template (at least the way we're
816 # defining them to behave now; the important thing is that they
817 # must be consistent).
818 ref = butler.put(metric, "metric2", dataId2)
819 self.assertTrue(self.checkFileExists(butler.datastore.root,
820 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
822 # Check the template based on dimensions
823 butler.datastore.templates.validateTemplates([ref])
825 # Now use a file template that will not result in unique filenames
826 ref = butler.put(metric, "metric3", dataId1)
828 # Check the template based on dimensions. This one is a bad template
829 with self.assertRaises(FileTemplateValidationError):
830 butler.datastore.templates.validateTemplates([ref])
832 with self.assertRaises(FileExistsError):
833 butler.put(metric, "metric3", dataId3)
835 def testImportExport(self):
836 # Run put/get tests just to create and populate a repo.
837 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
838 self.runImportExportTest(storageClass)
840 @unittest.expectedFailure
841 def testImportExportVirtualComposite(self):
842 # Run put/get tests just to create and populate a repo.
843 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
844 self.runImportExportTest(storageClass)
846 def runImportExportTest(self, storageClass):
847 exportButler = self.runPutGetTest(storageClass, "test_metric")
848 # Test that the repo actually has at least one dataset.
849 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
850 self.assertGreater(len(datasets), 0)
851 # Export those datasets. We used TemporaryDirectory because there
852 # doesn't seem to be a way to get the filename (as opposed to the file
853 # object) from any of tempfile's temporary-file context managers.
854 with tempfile.TemporaryDirectory() as exportDir:
855 # TODO: When PosixDatastore supports transfer-on-exist, add tests
856 # for that.
857 exportFile = os.path.join(exportDir, "exports.yaml")
858 with exportButler.export(filename=exportFile) as export:
859 export.saveDatasets(datasets)
860 self.assertTrue(os.path.exists(exportFile))
861 with tempfile.TemporaryDirectory() as importDir:
862 Butler.makeRepo(importDir, config=Config(self.configFile))
863 importButler = Butler(importDir, run="ingest/run")
864 importButler.import_(filename=exportFile, directory=exportButler.datastore.root,
865 transfer="symlink")
866 for ref in datasets:
867 with self.subTest(ref=ref):
868 # Test for existence by passing in the DatasetType and
869 # data ID separately, to avoid lookup by dataset_id.
870 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
873class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
874 """PosixDatastore specialization of a butler"""
875 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
876 fullConfigKey = ".datastore.formatters"
877 validationCanFail = True
878 datastoreStr = ["/tmp"]
879 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
880 registryStr = "/gen3.sqlite3"
883class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
884 """InMemoryDatastore specialization of a butler"""
885 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
886 fullConfigKey = None
887 useTempRoot = False
888 validationCanFail = False
889 datastoreStr = ["datastore='InMemory"]
890 datastoreName = ["InMemoryDatastore@"]
891 registryStr = ":memory:"
893 def testIngest(self):
894 pass
897class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
898 """PosixDatastore specialization"""
899 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
900 fullConfigKey = ".datastore.datastores.1.formatters"
901 validationCanFail = True
902 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
903 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
904 "SecondDatastore"]
905 registryStr = "/gen3.sqlite3"
908class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
909 """Test that a yaml file in one location can refer to a root in another."""
911 datastoreStr = ["dir1"]
912 # Disable the makeRepo test since we are deliberately not using
913 # butler.yaml as the config name.
914 fullConfigKey = None
916 def setUp(self):
917 self.root = tempfile.mkdtemp(dir=TESTDIR)
919 # Make a new repository in one place
920 self.dir1 = os.path.join(self.root, "dir1")
921 Butler.makeRepo(self.dir1, config=Config(self.configFile))
923 # Move the yaml file to a different place and add a "root"
924 self.dir2 = os.path.join(self.root, "dir2")
925 safeMakeDir(self.dir2)
926 configFile1 = os.path.join(self.dir1, "butler.yaml")
927 config = Config(configFile1)
928 config["root"] = self.dir1
929 configFile2 = os.path.join(self.dir2, "butler2.yaml")
930 config.dumpToFile(configFile2)
931 os.remove(configFile1)
932 self.tmpConfigFile = configFile2
934 def testFileLocations(self):
935 self.assertNotEqual(self.dir1, self.dir2)
936 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
937 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
938 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
941class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
942 """Test that a config file created by makeRepo outside of repo works."""
944 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
946 def setUp(self):
947 self.root = tempfile.mkdtemp(dir=TESTDIR)
948 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
950 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
951 Butler.makeRepo(self.root, config=Config(self.configFile),
952 outfile=self.tmpConfigFile)
954 def tearDown(self):
955 if os.path.exists(self.root2):
956 shutil.rmtree(self.root2, ignore_errors=True)
957 super().tearDown()
959 def testConfigExistence(self):
960 c = Config(self.tmpConfigFile)
961 uri_config = ButlerURI(c["root"])
962 uri_expected = ButlerURI(self.root, forceDirectory=True)
963 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
964 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
966 def testPutGet(self):
967 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
968 self.runPutGetTest(storageClass, "test_metric")
971class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
972 """Test that a config file created by makeRepo outside of repo works."""
974 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
976 def setUp(self):
977 self.root = tempfile.mkdtemp(dir=TESTDIR)
978 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
980 self.tmpConfigFile = self.root2
981 Butler.makeRepo(self.root, config=Config(self.configFile),
982 outfile=self.tmpConfigFile)
984 def testConfigExistence(self):
985 # Append the yaml file else Config constructor does not know the file
986 # type.
987 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
988 super().testConfigExistence()
991class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
992 """Test that a config file created by makeRepo outside of repo works."""
994 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
996 def setUp(self):
997 self.root = tempfile.mkdtemp(dir=TESTDIR)
998 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1000 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1001 Butler.makeRepo(self.root, config=Config(self.configFile),
1002 outfile=self.tmpConfigFile)
1005@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1006@mock_s3
1007class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1008 """S3Datastore specialization of a butler; an S3 storage Datastore +
1009 a local in-memory SqlRegistry.
1010 """
1011 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1012 fullConfigKey = None
1013 validationCanFail = True
1015 bucketName = "anybucketname"
1016 """Name of the Bucket that will be used in the tests. The name is read from
1017 the config file used with the tests during set-up.
1018 """
1020 root = "butlerRoot/"
1021 """Root repository directory expected to be used in case useTempRoot=False.
1022 Otherwise the root is set to a 20 characters long randomly generated string
1023 during set-up.
1024 """
1026 datastoreStr = [f"datastore={root}"]
1027 """Contains all expected root locations in a format expected to be
1028 returned by Butler stringification.
1029 """
1031 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1032 """The expected format of the S3Datastore string."""
1034 registryStr = f":memory:"
1035 """Expected format of the Registry string."""
1037 def genRoot(self):
1038 """Returns a random string of len 20 to serve as a root
1039 name for the temporary bucket repo.
1041 This is equivalent to tempfile.mkdtemp as this is what self.root
1042 becomes when useTempRoot is True.
1043 """
1044 rndstr = "".join(
1045 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1046 )
1047 return rndstr + "/"
1049 def setUp(self):
1050 config = Config(self.configFile)
1051 uri = ButlerURI(config[".datastore.datastore.root"])
1052 self.bucketName = uri.netloc
1054 # set up some fake credentials if they do not exist
1055 self.usingDummyCredentials = setAwsEnvCredentials()
1057 if self.useTempRoot:
1058 self.root = self.genRoot()
1059 rooturi = f"s3://{self.bucketName}/{self.root}"
1060 config.update({"datastore": {"datastore": {"root": rooturi}}})
1062 # MOTO needs to know that we expect Bucket bucketname to exist
1063 # (this used to be the class attribute bucketName)
1064 s3 = boto3.resource("s3")
1065 s3.create_bucket(Bucket=self.bucketName)
1067 self.datastoreStr = f"datastore={self.root}"
1068 self.datastoreName = [f"S3Datastore@{rooturi}"]
1069 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1070 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1072 def tearDown(self):
1073 s3 = boto3.resource("s3")
1074 bucket = s3.Bucket(self.bucketName)
1075 try:
1076 bucket.objects.all().delete()
1077 except botocore.exceptions.ClientError as e:
1078 if e.response["Error"]["Code"] == "404":
1079 # the key was not reachable - pass
1080 pass
1081 else:
1082 raise
1084 bucket = s3.Bucket(self.bucketName)
1085 bucket.delete()
1087 # unset any potentially set dummy credentials
1088 if self.usingDummyCredentials:
1089 unsetAwsEnvCredentials()
1091 def checkFileExists(self, root, relpath):
1092 """Checks if file exists at a given path (relative to root).
1094 Test testPutTemplates verifies actual physical existance of the files
1095 in the requested location. For S3Datastore this test is equivalent to
1096 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
1097 """
1098 uri = ButlerURI(root)
1099 uri.updateFile(relpath)
1100 return s3CheckFileExists(uri)[0]
1102 @unittest.expectedFailure
1103 def testImportExport(self):
1104 super().testImportExport()
1107if __name__ == "__main__": 1107 ↛ 1108line 1107 didn't jump to line 1108, because the condition on line 1107 was never true
1108 unittest.main()