Coverage for tests/test_butler.py: 12%

1157 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-08 10:28 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import string 

34import tempfile 

35import unittest 

36 

37try: 

38 import boto3 

39 import botocore 

40 from moto import mock_s3 

41except ImportError: 

42 boto3 = None 

43 

44 def mock_s3(cls): 

45 """A no-op decorator in case moto mock_s3 can not be imported.""" 

46 return cls 

47 

48 

49try: 

50 # It's possible but silly to have testing.postgresql installed without 

51 # having the postgresql server installed (because then nothing in 

52 # testing.postgresql would work), so we use the presence of that module 

53 # to test whether we can expect the server to be available. 

54 import testing.postgresql 

55except ImportError: 

56 testing = None 

57 

58import astropy.time 

59import sqlalchemy 

60from lsst.daf.butler import ( 

61 Butler, 

62 ButlerConfig, 

63 CollectionType, 

64 Config, 

65 DatasetIdGenEnum, 

66 DatasetRef, 

67 DatasetType, 

68 FileDataset, 

69 FileTemplate, 

70 FileTemplateValidationError, 

71 StorageClassFactory, 

72 ValidationError, 

73 script, 

74) 

75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

76from lsst.daf.butler.registry import ( 

77 CollectionError, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 MissingCollectionError, 

82) 

83from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

84from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

85from lsst.resources import ResourcePath 

86from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

87from lsst.utils import doImport 

88from lsst.utils.introspection import get_full_type_name 

89 

90TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

91 

92 

93def makeExampleMetrics(): 

94 return MetricsExample( 

95 {"AM1": 5.2, "AM2": 30.6}, 

96 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

97 [563, 234, 456.7, 752, 8, 9, 27], 

98 ) 

99 

100 

101class TransactionTestError(Exception): 

102 """Specific error for testing transactions, to prevent misdiagnosing 

103 that might otherwise occur when a standard exception is used. 

104 """ 

105 

106 pass 

107 

108 

109class ButlerConfigTests(unittest.TestCase): 

110 """Simple tests for ButlerConfig that are not tested in any other test 

111 cases.""" 

112 

113 def testSearchPath(self): 

114 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

115 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

116 config1 = ButlerConfig(configFile) 

117 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

118 

119 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

120 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

121 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

122 self.assertIn("testConfigs", "\n".join(cm.output)) 

123 

124 key = ("datastore", "records", "table") 

125 self.assertNotEqual(config1[key], config2[key]) 

126 self.assertEqual(config2[key], "override_record") 

127 

128 

129class ButlerPutGetTests: 

130 """Helper method for running a suite of put/get tests from different 

131 butler configurations.""" 

132 

133 root = None 

134 default_run = "ingésτ😺" 

135 

136 @staticmethod 

137 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

138 """Create a DatasetType and register it""" 

139 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

140 registry.registerDatasetType(datasetType) 

141 return datasetType 

142 

143 @classmethod 

144 def setUpClass(cls): 

145 cls.storageClassFactory = StorageClassFactory() 

146 cls.storageClassFactory.addFromConfig(cls.configFile) 

147 

148 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

149 datasetType = datasetRef.datasetType 

150 dataId = datasetRef.dataId 

151 deferred = butler.getDirectDeferred(datasetRef) 

152 

153 for component in components: 

154 compTypeName = datasetType.componentTypeName(component) 

155 result = butler.get(compTypeName, dataId, collections=collections) 

156 self.assertEqual(result, getattr(reference, component)) 

157 result_deferred = deferred.get(component=component) 

158 self.assertEqual(result_deferred, result) 

159 

160 def tearDown(self): 

161 removeTestTempDir(self.root) 

162 

163 def create_butler(self, run, storageClass, datasetTypeName): 

164 butler = Butler(self.tmpConfigFile, run=run) 

165 

166 collections = set(butler.registry.queryCollections()) 

167 self.assertEqual(collections, set([run])) 

168 

169 # Create and register a DatasetType 

170 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

171 

172 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

173 

174 # Add needed Dimensions 

175 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

176 butler.registry.insertDimensionData( 

177 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

178 ) 

179 butler.registry.insertDimensionData( 

180 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

181 ) 

182 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

183 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

184 butler.registry.insertDimensionData( 

185 "visit", 

186 { 

187 "instrument": "DummyCamComp", 

188 "id": 423, 

189 "name": "fourtwentythree", 

190 "physical_filter": "d-r", 

191 "visit_system": 1, 

192 "datetime_begin": visit_start, 

193 "datetime_end": visit_end, 

194 }, 

195 ) 

196 

197 # Add more visits for some later tests 

198 for visit_id in (424, 425): 

199 butler.registry.insertDimensionData( 

200 "visit", 

201 { 

202 "instrument": "DummyCamComp", 

203 "id": visit_id, 

204 "name": f"fourtwentyfour_{visit_id}", 

205 "physical_filter": "d-r", 

206 "visit_system": 1, 

207 }, 

208 ) 

209 return butler, datasetType 

210 

211 def runPutGetTest(self, storageClass, datasetTypeName): 

212 # New datasets will be added to run and tag, but we will only look in 

213 # tag when looking up datasets. 

214 run = self.default_run 

215 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

216 

217 # Create and store a dataset 

218 metric = makeExampleMetrics() 

219 dataId = {"instrument": "DummyCamComp", "visit": 423} 

220 

221 # Create a DatasetRef for put 

222 refIn = DatasetRef(datasetType, dataId, id=None) 

223 

224 # Put with a preexisting id should fail 

225 with self.assertRaises(ValueError): 

226 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

227 

228 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

229 # and once with a DatasetType 

230 

231 # Keep track of any collections we add and do not clean up 

232 expected_collections = {run} 

233 

234 counter = 0 

235 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

236 # Since we are using subTest we can get cascading failures 

237 # here with the first attempt failing and the others failing 

238 # immediately because the dataset already exists. Work around 

239 # this by using a distinct run collection each time 

240 counter += 1 

241 this_run = f"put_run_{counter}" 

242 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

243 expected_collections.update({this_run}) 

244 

245 with self.subTest(args=args): 

246 ref = butler.put(metric, *args, run=this_run) 

247 self.assertIsInstance(ref, DatasetRef) 

248 

249 # Test getDirect 

250 metricOut = butler.getDirect(ref) 

251 self.assertEqual(metric, metricOut) 

252 # Test get 

253 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

254 self.assertEqual(metric, metricOut) 

255 # Test get with a datasetRef 

256 metricOut = butler.get(ref, collections=this_run) 

257 self.assertEqual(metric, metricOut) 

258 # Test getDeferred with dataId 

259 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

260 self.assertEqual(metric, metricOut) 

261 # Test getDeferred with a datasetRef 

262 metricOut = butler.getDeferred(ref, collections=this_run).get() 

263 self.assertEqual(metric, metricOut) 

264 # and deferred direct with ref 

265 metricOut = butler.getDirectDeferred(ref).get() 

266 self.assertEqual(metric, metricOut) 

267 

268 # Check we can get components 

269 if storageClass.isComposite(): 

270 self.assertGetComponents( 

271 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

272 ) 

273 

274 # Can the artifacts themselves be retrieved? 

275 if not butler.datastore.isEphemeral: 

276 root_uri = ResourcePath(self.root) 

277 

278 for preserve_path in (True, False): 

279 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

280 # Use copy so that we can test that overwrite 

281 # protection works (using "auto" for File URIs would 

282 # use hard links and subsequent transfer would work 

283 # because it knows they are the same file). 

284 transferred = butler.retrieveArtifacts( 

285 [ref], destination, preserve_path=preserve_path, transfer="copy" 

286 ) 

287 self.assertGreater(len(transferred), 0) 

288 artifacts = list(ResourcePath.findFileResources([destination])) 

289 self.assertEqual(set(transferred), set(artifacts)) 

290 

291 for artifact in transferred: 

292 path_in_destination = artifact.relative_to(destination) 

293 self.assertIsNotNone(path_in_destination) 

294 

295 # when path is not preserved there should not be 

296 # any path separators. 

297 num_seps = path_in_destination.count("/") 

298 if preserve_path: 

299 self.assertGreater(num_seps, 0) 

300 else: 

301 self.assertEqual(num_seps, 0) 

302 

303 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

304 n_uris = len(secondary_uris) 

305 if primary_uri: 

306 n_uris += 1 

307 self.assertEqual( 

308 len(artifacts), 

309 n_uris, 

310 "Comparing expected artifacts vs actual:" 

311 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

312 ) 

313 

314 if preserve_path: 

315 # No need to run these twice 

316 with self.assertRaises(ValueError): 

317 butler.retrieveArtifacts([ref], destination, transfer="move") 

318 

319 with self.assertRaises(FileExistsError): 

320 butler.retrieveArtifacts([ref], destination) 

321 

322 transferred_again = butler.retrieveArtifacts( 

323 [ref], destination, preserve_path=preserve_path, overwrite=True 

324 ) 

325 self.assertEqual(set(transferred_again), set(transferred)) 

326 

327 # Now remove the dataset completely. 

328 butler.pruneDatasets([ref], purge=True, unstore=True) 

329 # Lookup with original args should still fail. 

330 with self.assertRaises(LookupError): 

331 butler.datasetExists(*args, collections=this_run) 

332 # getDirect() should still fail. 

333 with self.assertRaises(FileNotFoundError): 

334 butler.getDirect(ref) 

335 # Registry shouldn't be able to find it by dataset_id anymore. 

336 self.assertIsNone(butler.registry.getDataset(ref.id)) 

337 

338 # Do explicit registry removal since we know they are 

339 # empty 

340 butler.registry.removeCollection(this_run) 

341 expected_collections.remove(this_run) 

342 

343 # Put the dataset again, since the last thing we did was remove it 

344 # and we want to use the default collection. 

345 ref = butler.put(metric, refIn) 

346 

347 # Get with parameters 

348 stop = 4 

349 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

350 self.assertNotEqual(metric, sliced) 

351 self.assertEqual(metric.summary, sliced.summary) 

352 self.assertEqual(metric.output, sliced.output) 

353 self.assertEqual(metric.data[:stop], sliced.data) 

354 # getDeferred with parameters 

355 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

356 self.assertNotEqual(metric, sliced) 

357 self.assertEqual(metric.summary, sliced.summary) 

358 self.assertEqual(metric.output, sliced.output) 

359 self.assertEqual(metric.data[:stop], sliced.data) 

360 # getDeferred with deferred parameters 

361 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

362 self.assertNotEqual(metric, sliced) 

363 self.assertEqual(metric.summary, sliced.summary) 

364 self.assertEqual(metric.output, sliced.output) 

365 self.assertEqual(metric.data[:stop], sliced.data) 

366 

367 if storageClass.isComposite(): 

368 # Check that components can be retrieved 

369 metricOut = butler.get(ref.datasetType.name, dataId) 

370 compNameS = ref.datasetType.componentTypeName("summary") 

371 compNameD = ref.datasetType.componentTypeName("data") 

372 summary = butler.get(compNameS, dataId) 

373 self.assertEqual(summary, metric.summary) 

374 data = butler.get(compNameD, dataId) 

375 self.assertEqual(data, metric.data) 

376 

377 if "counter" in storageClass.derivedComponents: 

378 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

379 self.assertEqual(count, len(data)) 

380 

381 count = butler.get( 

382 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

383 ) 

384 self.assertEqual(count, stop) 

385 

386 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

387 summary = butler.getDirect(compRef) 

388 self.assertEqual(summary, metric.summary) 

389 

390 # Create a Dataset type that has the same name but is inconsistent. 

391 inconsistentDatasetType = DatasetType( 

392 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

393 ) 

394 

395 # Getting with a dataset type that does not match registry fails 

396 with self.assertRaises(ValueError): 

397 butler.get(inconsistentDatasetType, dataId) 

398 

399 # Combining a DatasetRef with a dataId should fail 

400 with self.assertRaises(ValueError): 

401 butler.get(ref, dataId) 

402 # Getting with an explicit ref should fail if the id doesn't match 

403 with self.assertRaises(ValueError): 

404 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

405 

406 # Getting a dataset with unknown parameters should fail 

407 with self.assertRaises(KeyError): 

408 butler.get(ref, parameters={"unsupported": True}) 

409 

410 # Check we have a collection 

411 collections = set(butler.registry.queryCollections()) 

412 self.assertEqual(collections, expected_collections) 

413 

414 # Clean up to check that we can remove something that may have 

415 # already had a component removed 

416 butler.pruneDatasets([ref], unstore=True, purge=True) 

417 

418 # Check that we can configure a butler to accept a put even 

419 # if it already has the dataset in registry. 

420 ref = butler.put(metric, refIn) 

421 

422 # Repeat put will fail. 

423 with self.assertRaises(ConflictingDefinitionError): 

424 butler.put(metric, refIn) 

425 

426 # Remove the datastore entry. 

427 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

428 

429 # Put will still fail 

430 with self.assertRaises(ConflictingDefinitionError): 

431 butler.put(metric, refIn) 

432 

433 # Allow the put to succeed 

434 butler._allow_put_of_predefined_dataset = True 

435 ref2 = butler.put(metric, refIn) 

436 self.assertEqual(ref2.id, ref.id) 

437 

438 # A second put will still fail but with a different exception 

439 # than before. 

440 with self.assertRaises(ConflictingDefinitionError): 

441 butler.put(metric, refIn) 

442 

443 # Reset the flag to avoid confusion 

444 butler._allow_put_of_predefined_dataset = False 

445 

446 # Leave the dataset in place since some downstream tests require 

447 # something to be present 

448 

449 return butler 

450 

451 def testDeferredCollectionPassing(self): 

452 # Construct a butler with no run or collection, but make it writeable. 

453 butler = Butler(self.tmpConfigFile, writeable=True) 

454 # Create and register a DatasetType 

455 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

456 datasetType = self.addDatasetType( 

457 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

458 ) 

459 # Add needed Dimensions 

460 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

461 butler.registry.insertDimensionData( 

462 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

463 ) 

464 butler.registry.insertDimensionData( 

465 "visit", 

466 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

467 ) 

468 dataId = {"instrument": "DummyCamComp", "visit": 423} 

469 # Create dataset. 

470 metric = makeExampleMetrics() 

471 # Register a new run and put dataset. 

472 run = "deferred" 

473 self.assertTrue(butler.registry.registerRun(run)) 

474 # Second time it will be allowed but indicate no-op 

475 self.assertFalse(butler.registry.registerRun(run)) 

476 ref = butler.put(metric, datasetType, dataId, run=run) 

477 # Putting with no run should fail with TypeError. 

478 with self.assertRaises(CollectionError): 

479 butler.put(metric, datasetType, dataId) 

480 # Dataset should exist. 

481 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

482 # We should be able to get the dataset back, but with and without 

483 # a deferred dataset handle. 

484 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

485 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

486 # Trying to find the dataset without any collection is a TypeError. 

487 with self.assertRaises(CollectionError): 

488 butler.datasetExists(datasetType, dataId) 

489 with self.assertRaises(CollectionError): 

490 butler.get(datasetType, dataId) 

491 # Associate the dataset with a different collection. 

492 butler.registry.registerCollection("tagged") 

493 butler.registry.associate("tagged", [ref]) 

494 # Deleting the dataset from the new collection should make it findable 

495 # in the original collection. 

496 butler.pruneDatasets([ref], tags=["tagged"]) 

497 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

498 

499 

500class ButlerTests(ButlerPutGetTests): 

501 """Tests for Butler.""" 

502 

503 useTempRoot = True 

504 

505 def setUp(self): 

506 """Create a new butler root for each test.""" 

507 self.root = makeTestTempDir(TESTDIR) 

508 Butler.makeRepo(self.root, config=Config(self.configFile)) 

509 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

510 

511 def testConstructor(self): 

512 """Independent test of constructor.""" 

513 butler = Butler(self.tmpConfigFile, run=self.default_run) 

514 self.assertIsInstance(butler, Butler) 

515 

516 # Check that butler.yaml is added automatically. 

517 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

518 config_dir = self.tmpConfigFile[: -len(end)] 

519 butler = Butler(config_dir, run=self.default_run) 

520 self.assertIsInstance(butler, Butler) 

521 

522 # Even with a ResourcePath. 

523 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

524 self.assertIsInstance(butler, Butler) 

525 

526 collections = set(butler.registry.queryCollections()) 

527 self.assertEqual(collections, {self.default_run}) 

528 

529 # Check that some special characters can be included in run name. 

530 special_run = "u@b.c-A" 

531 butler_special = Butler(butler=butler, run=special_run) 

532 collections = set(butler_special.registry.queryCollections("*@*")) 

533 self.assertEqual(collections, {special_run}) 

534 

535 butler2 = Butler(butler=butler, collections=["other"]) 

536 self.assertEqual(butler2.collections, ("other",)) 

537 self.assertIsNone(butler2.run) 

538 self.assertIs(butler.datastore, butler2.datastore) 

539 

540 # Test that we can use an environment variable to find this 

541 # repository. 

542 butler_index = Config() 

543 butler_index["label"] = self.tmpConfigFile 

544 for suffix in (".yaml", ".json"): 

545 # Ensure that the content differs so that we know that 

546 # we aren't reusing the cache. 

547 bad_label = f"s3://bucket/not_real{suffix}" 

548 butler_index["bad_label"] = bad_label 

549 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

550 butler_index.dumpToUri(temp_file) 

551 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

552 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

553 uri = Butler.get_repo_uri("bad_label") 

554 self.assertEqual(uri, ResourcePath(bad_label)) 

555 uri = Butler.get_repo_uri("label") 

556 butler = Butler(uri, writeable=False) 

557 self.assertIsInstance(butler, Butler) 

558 butler = Butler("label", writeable=False) 

559 self.assertIsInstance(butler, Butler) 

560 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

561 Butler("not_there", writeable=False) 

562 with self.assertRaises(KeyError) as cm: 

563 Butler.get_repo_uri("missing") 

564 self.assertIn("not known to", str(cm.exception)) 

565 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

566 with self.assertRaises(FileNotFoundError): 

567 Butler.get_repo_uri("label") 

568 self.assertEqual(Butler.get_known_repos(), set()) 

569 with self.assertRaises(KeyError) as cm: 

570 # No environment variable set. 

571 Butler.get_repo_uri("label") 

572 self.assertIn("No repository index defined", str(cm.exception)) 

573 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

574 # No aliases registered. 

575 Butler("not_there") 

576 self.assertEqual(Butler.get_known_repos(), set()) 

577 

578 def testBasicPutGet(self): 

579 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

580 self.runPutGetTest(storageClass, "test_metric") 

581 

582 def testCompositePutGetConcrete(self): 

583 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

584 butler = self.runPutGetTest(storageClass, "test_metric") 

585 

586 # Should *not* be disassembled 

587 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

588 self.assertEqual(len(datasets), 1) 

589 uri, components = butler.getURIs(datasets[0]) 

590 self.assertIsInstance(uri, ResourcePath) 

591 self.assertFalse(components) 

592 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

593 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

594 

595 # Predicted dataset 

596 dataId = {"instrument": "DummyCamComp", "visit": 424} 

597 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

598 self.assertFalse(components) 

599 self.assertIsInstance(uri, ResourcePath) 

600 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

601 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

602 

603 def testCompositePutGetVirtual(self): 

604 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

605 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

606 

607 # Should be disassembled 

608 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

609 self.assertEqual(len(datasets), 1) 

610 uri, components = butler.getURIs(datasets[0]) 

611 

612 if butler.datastore.isEphemeral: 

613 # Never disassemble in-memory datastore 

614 self.assertIsInstance(uri, ResourcePath) 

615 self.assertFalse(components) 

616 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

617 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

618 else: 

619 self.assertIsNone(uri) 

620 self.assertEqual(set(components), set(storageClass.components)) 

621 for compuri in components.values(): 

622 self.assertIsInstance(compuri, ResourcePath) 

623 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

624 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

625 

626 # Predicted dataset 

627 dataId = {"instrument": "DummyCamComp", "visit": 424} 

628 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

629 

630 if butler.datastore.isEphemeral: 

631 # Never disassembled 

632 self.assertIsInstance(uri, ResourcePath) 

633 self.assertFalse(components) 

634 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

635 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

636 else: 

637 self.assertIsNone(uri) 

638 self.assertEqual(set(components), set(storageClass.components)) 

639 for compuri in components.values(): 

640 self.assertIsInstance(compuri, ResourcePath) 

641 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

642 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

643 

644 def testStorageClassOverrideGet(self): 

645 """Test storage class conversion on get with override.""" 

646 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

647 datasetTypeName = "anything" 

648 run = self.default_run 

649 

650 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

651 

652 # Create and store a dataset. 

653 metric = makeExampleMetrics() 

654 dataId = {"instrument": "DummyCamComp", "visit": 423} 

655 

656 ref = butler.put(metric, datasetType, dataId) 

657 

658 # Return native type. 

659 retrieved = butler.get(ref) 

660 self.assertEqual(retrieved, metric) 

661 

662 # Specify an override. 

663 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

664 model = butler.getDirect(ref, storageClass=new_sc) 

665 self.assertNotEqual(type(model), type(retrieved)) 

666 self.assertIs(type(model), new_sc.pytype) 

667 self.assertEqual(retrieved, model) 

668 

669 # Defer but override later. 

670 deferred = butler.getDirectDeferred(ref) 

671 model = deferred.get(storageClass=new_sc) 

672 self.assertIs(type(model), new_sc.pytype) 

673 self.assertEqual(retrieved, model) 

674 

675 # Defer but override up front. 

676 deferred = butler.getDirectDeferred(ref, storageClass=new_sc) 

677 model = deferred.get() 

678 self.assertIs(type(model), new_sc.pytype) 

679 self.assertEqual(retrieved, model) 

680 

681 # Retrieve a component. Should be a tuple. 

682 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

683 self.assertIs(type(data), tuple) 

684 self.assertEqual(data, tuple(retrieved.data)) 

685 

686 # Parameter on the write storage class should work regardless 

687 # of read storage class. 

688 data = butler.get( 

689 "anything.data", 

690 dataId, 

691 storageClass="StructuredDataDataTestTuple", 

692 parameters={"slice": slice(2, 4)}, 

693 ) 

694 self.assertEqual(len(data), 2) 

695 

696 # Try a parameter that is known to the read storage class but not 

697 # the write storage class. 

698 with self.assertRaises(KeyError): 

699 butler.get( 

700 "anything.data", 

701 dataId, 

702 storageClass="StructuredDataDataTestTuple", 

703 parameters={"xslice": slice(2, 4)}, 

704 ) 

705 

706 def testPytypePutCoercion(self): 

707 """Test python type coercion on Butler.get and put.""" 

708 

709 # Store some data with the normal example storage class. 

710 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

711 datasetTypeName = "test_metric" 

712 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

713 

714 dataId = {"instrument": "DummyCamComp", "visit": 423} 

715 

716 # Put a dict and this should coerce to a MetricsExample 

717 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

718 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

719 test_metric = butler.getDirect(metric_ref) 

720 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

721 self.assertEqual(test_metric.summary, test_dict["summary"]) 

722 self.assertEqual(test_metric.output, test_dict["output"]) 

723 

724 # Check that the put still works if a DatasetType is given with 

725 # a definition matching this python type. 

726 registry_type = butler.registry.getDatasetType(datasetTypeName) 

727 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

728 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

729 self.assertEqual(metric2_ref.datasetType, registry_type) 

730 

731 # The get will return the type expected by registry. 

732 test_metric2 = butler.getDirect(metric2_ref) 

733 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

734 

735 # Make a new DatasetRef with the compatible but different DatasetType. 

736 # This should now return a dict. 

737 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

738 test_dict2 = butler.getDirect(new_ref) 

739 self.assertEqual(get_full_type_name(test_dict2), "dict") 

740 

741 # Get it again with the wrong dataset type definition using get() 

742 # rather than getDirect(). This should be consistent with getDirect() 

743 # behavior and return the type of the DatasetType. 

744 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

745 self.assertEqual(get_full_type_name(test_dict3), "dict") 

746 

747 def testIngest(self): 

748 butler = Butler(self.tmpConfigFile, run=self.default_run) 

749 

750 # Create and register a DatasetType 

751 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

752 

753 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

754 datasetTypeName = "metric" 

755 

756 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

757 

758 # Add needed Dimensions 

759 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

760 butler.registry.insertDimensionData( 

761 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

762 ) 

763 for detector in (1, 2): 

764 butler.registry.insertDimensionData( 

765 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

766 ) 

767 

768 butler.registry.insertDimensionData( 

769 "visit", 

770 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

771 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

772 ) 

773 

774 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

775 dataRoot = os.path.join(TESTDIR, "data", "basic") 

776 datasets = [] 

777 for detector in (1, 2): 

778 detector_name = f"detector_{detector}" 

779 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

780 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

781 # Create a DatasetRef for ingest 

782 refIn = DatasetRef(datasetType, dataId, id=None) 

783 

784 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

785 

786 butler.ingest(*datasets, transfer="copy") 

787 

788 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

789 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

790 

791 metrics1 = butler.get(datasetTypeName, dataId1) 

792 metrics2 = butler.get(datasetTypeName, dataId2) 

793 self.assertNotEqual(metrics1, metrics2) 

794 

795 # Compare URIs 

796 uri1 = butler.getURI(datasetTypeName, dataId1) 

797 uri2 = butler.getURI(datasetTypeName, dataId2) 

798 self.assertNotEqual(uri1, uri2) 

799 

800 # Now do a multi-dataset but single file ingest 

801 metricFile = os.path.join(dataRoot, "detectors.yaml") 

802 refs = [] 

803 for detector in (1, 2): 

804 detector_name = f"detector_{detector}" 

805 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

806 # Create a DatasetRef for ingest 

807 refs.append(DatasetRef(datasetType, dataId, id=None)) 

808 

809 # Test "move" transfer to ensure that the files themselves 

810 # have disappeared following ingest. 

811 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

812 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

813 

814 datasets = [] 

815 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

816 

817 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

818 self.assertFalse(tempFile.exists()) 

819 

820 # Check that the datastore recorded no file size. 

821 # Not all datastores can support this. 

822 try: 

823 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

824 self.assertEqual(infos[0].file_size, -1) 

825 except AttributeError: 

826 pass 

827 

828 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

829 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

830 

831 multi1 = butler.get(datasetTypeName, dataId1) 

832 multi2 = butler.get(datasetTypeName, dataId2) 

833 

834 self.assertEqual(multi1, metrics1) 

835 self.assertEqual(multi2, metrics2) 

836 

837 # Compare URIs 

838 uri1 = butler.getURI(datasetTypeName, dataId1) 

839 uri2 = butler.getURI(datasetTypeName, dataId2) 

840 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

841 

842 # Test that removing one does not break the second 

843 # This line will issue a warning log message for a ChainedDatastore 

844 # that uses an InMemoryDatastore since in-memory can not ingest 

845 # files. 

846 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

847 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

848 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

849 multi2b = butler.get(datasetTypeName, dataId2) 

850 self.assertEqual(multi2, multi2b) 

851 

852 def testPruneCollections(self): 

853 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

854 butler = Butler(self.tmpConfigFile, writeable=True) 

855 # Load registry data with dimensions to hang datasets off of. 

856 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

857 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

858 # Add some RUN-type collections. 

859 run1 = "run1" 

860 butler.registry.registerRun(run1) 

861 run2 = "run2" 

862 butler.registry.registerRun(run2) 

863 # put some datasets. ref1 and ref2 have the same data ID, and are in 

864 # different runs. ref3 has a different data ID. 

865 metric = makeExampleMetrics() 

866 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

867 datasetType = self.addDatasetType( 

868 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

869 ) 

870 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

871 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

872 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

873 

874 # Try to delete a RUN collection without purge, or with purge and not 

875 # unstore. 

876 with self.assertRaises(TypeError): 

877 butler.pruneCollection(run1) 

878 with self.assertRaises(TypeError): 

879 butler.pruneCollection(run2, purge=True) 

880 # Add a TAGGED collection and associate ref3 only into it. 

881 tag1 = "tag1" 

882 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

883 self.assertTrue(registered) 

884 # Registering a second time should be allowed. 

885 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

886 self.assertFalse(registered) 

887 butler.registry.associate(tag1, [ref3]) 

888 # Add a CHAINED collection that searches run1 and then run2. It 

889 # logically contains only ref1, because ref2 is shadowed due to them 

890 # having the same data ID and dataset type. 

891 chain1 = "chain1" 

892 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

893 butler.registry.setCollectionChain(chain1, [run1, run2]) 

894 # Try to delete RUN collections, which should fail with complete 

895 # rollback because they're still referenced by the CHAINED 

896 # collection. 

897 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

898 butler.pruneCollection(run1, purge=True, unstore=True) 

899 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

900 butler.pruneCollection(run2, purge=True, unstore=True) 

901 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

902 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

903 self.assertTrue(existence[ref1]) 

904 self.assertTrue(existence[ref2]) 

905 self.assertTrue(existence[ref3]) 

906 # Try to delete CHAINED and TAGGED collections with purge; should not 

907 # work. 

908 with self.assertRaises(TypeError): 

909 butler.pruneCollection(tag1, purge=True, unstore=True) 

910 with self.assertRaises(TypeError): 

911 butler.pruneCollection(chain1, purge=True, unstore=True) 

912 # Remove the tagged collection with unstore=False. This should not 

913 # affect the datasets. 

914 butler.pruneCollection(tag1) 

915 with self.assertRaises(MissingCollectionError): 

916 butler.registry.getCollectionType(tag1) 

917 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

918 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

919 self.assertTrue(existence[ref1]) 

920 self.assertTrue(existence[ref2]) 

921 self.assertTrue(existence[ref3]) 

922 # Add the tagged collection back in, and remove it with unstore=True. 

923 # This should remove ref3 only from the datastore. 

924 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

925 butler.registry.associate(tag1, [ref3]) 

926 butler.pruneCollection(tag1, unstore=True) 

927 with self.assertRaises(MissingCollectionError): 

928 butler.registry.getCollectionType(tag1) 

929 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

930 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

931 self.assertTrue(existence[ref1]) 

932 self.assertTrue(existence[ref2]) 

933 self.assertFalse(existence[ref3]) 

934 # Delete the chain with unstore=False. The datasets should not be 

935 # affected at all. 

936 butler.pruneCollection(chain1) 

937 with self.assertRaises(MissingCollectionError): 

938 butler.registry.getCollectionType(chain1) 

939 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

940 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

941 self.assertTrue(existence[ref1]) 

942 self.assertTrue(existence[ref2]) 

943 self.assertFalse(existence[ref3]) 

944 existence = butler.datastore.knows_these([ref1, ref2, ref3]) 

945 self.assertTrue(existence[ref1]) 

946 self.assertTrue(existence[ref2]) 

947 self.assertFalse(existence[ref3]) 

948 # Redefine and then delete the chain with unstore=True. Only ref1 

949 # should be unstored (ref3 has already been unstored, but otherwise 

950 # would be now). 

951 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

952 butler.registry.setCollectionChain(chain1, [run1, run2]) 

953 butler.pruneCollection(chain1, unstore=True) 

954 with self.assertRaises(MissingCollectionError): 

955 butler.registry.getCollectionType(chain1) 

956 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

957 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

958 self.assertFalse(existence[ref1]) 

959 self.assertTrue(existence[ref2]) 

960 self.assertFalse(existence[ref3]) 

961 # Remove run1. This removes ref1 and ref3 from the registry (they're 

962 # already gone from the datastore, which is fine). 

963 butler.pruneCollection(run1, purge=True, unstore=True) 

964 with self.assertRaises(MissingCollectionError): 

965 butler.registry.getCollectionType(run1) 

966 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

967 self.assertTrue(butler.datastore.exists(ref2)) 

968 self.assertTrue(butler.datastore.knows(ref2)) 

969 # Remove run2. This removes ref2 from the registry and the datastore. 

970 butler.pruneCollection(run2, purge=True, unstore=True) 

971 with self.assertRaises(MissingCollectionError): 

972 butler.registry.getCollectionType(run2) 

973 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

974 

975 # Now that the collections have been pruned we can remove the 

976 # dataset type 

977 butler.registry.removeDatasetType(datasetType.name) 

978 

979 def testPickle(self): 

980 """Test pickle support.""" 

981 butler = Butler(self.tmpConfigFile, run=self.default_run) 

982 butlerOut = pickle.loads(pickle.dumps(butler)) 

983 self.assertIsInstance(butlerOut, Butler) 

984 self.assertEqual(butlerOut._config, butler._config) 

985 self.assertEqual(butlerOut.collections, butler.collections) 

986 self.assertEqual(butlerOut.run, butler.run) 

987 

988 def testGetDatasetTypes(self): 

989 butler = Butler(self.tmpConfigFile, run=self.default_run) 

990 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

991 dimensionEntries = [ 

992 ( 

993 "instrument", 

994 {"instrument": "DummyCam"}, 

995 {"instrument": "DummyHSC"}, 

996 {"instrument": "DummyCamComp"}, 

997 ), 

998 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

999 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1000 ] 

1001 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1002 # Add needed Dimensions 

1003 for args in dimensionEntries: 

1004 butler.registry.insertDimensionData(*args) 

1005 

1006 # When a DatasetType is added to the registry entries are not created 

1007 # for components but querying them can return the components. 

1008 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1009 components = set() 

1010 for datasetTypeName in datasetTypeNames: 

1011 # Create and register a DatasetType 

1012 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1013 

1014 for componentName in storageClass.components: 

1015 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1016 

1017 fromRegistry: set[DatasetType] = set() 

1018 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1019 fromRegistry.add(parent_dataset_type) 

1020 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1021 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1022 

1023 # Now that we have some dataset types registered, validate them 

1024 butler.validateConfiguration( 

1025 ignore=[ 

1026 "test_metric_comp", 

1027 "metric3", 

1028 "metric5", 

1029 "calexp", 

1030 "DummySC", 

1031 "datasetType.component", 

1032 "random_data", 

1033 "random_data_2", 

1034 ] 

1035 ) 

1036 

1037 # Add a new datasetType that will fail template validation 

1038 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1039 if self.validationCanFail: 

1040 with self.assertRaises(ValidationError): 

1041 butler.validateConfiguration() 

1042 

1043 # Rerun validation but with a subset of dataset type names 

1044 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1045 

1046 # Rerun validation but ignore the bad datasetType 

1047 butler.validateConfiguration( 

1048 ignore=[ 

1049 "test_metric_comp", 

1050 "metric3", 

1051 "metric5", 

1052 "calexp", 

1053 "DummySC", 

1054 "datasetType.component", 

1055 "random_data", 

1056 "random_data_2", 

1057 ] 

1058 ) 

1059 

1060 def testTransaction(self): 

1061 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1062 datasetTypeName = "test_metric" 

1063 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1064 dimensionEntries = ( 

1065 ("instrument", {"instrument": "DummyCam"}), 

1066 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1067 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1068 ) 

1069 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1070 metric = makeExampleMetrics() 

1071 dataId = {"instrument": "DummyCam", "visit": 42} 

1072 # Create and register a DatasetType 

1073 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1074 with self.assertRaises(TransactionTestError): 

1075 with butler.transaction(): 

1076 # Add needed Dimensions 

1077 for args in dimensionEntries: 

1078 butler.registry.insertDimensionData(*args) 

1079 # Store a dataset 

1080 ref = butler.put(metric, datasetTypeName, dataId) 

1081 self.assertIsInstance(ref, DatasetRef) 

1082 # Test getDirect 

1083 metricOut = butler.getDirect(ref) 

1084 self.assertEqual(metric, metricOut) 

1085 # Test get 

1086 metricOut = butler.get(datasetTypeName, dataId) 

1087 self.assertEqual(metric, metricOut) 

1088 # Check we can get components 

1089 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1090 raise TransactionTestError("This should roll back the entire transaction") 

1091 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1092 butler.registry.expandDataId(dataId) 

1093 # Should raise LookupError for missing data ID value 

1094 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1095 butler.get(datasetTypeName, dataId) 

1096 # Also check explicitly if Dataset entry is missing 

1097 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1098 # Direct retrieval should not find the file in the Datastore 

1099 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1100 butler.getDirect(ref) 

1101 

1102 def testMakeRepo(self): 

1103 """Test that we can write butler configuration to a new repository via 

1104 the Butler.makeRepo interface and then instantiate a butler from the 

1105 repo root. 

1106 """ 

1107 # Do not run the test if we know this datastore configuration does 

1108 # not support a file system root 

1109 if self.fullConfigKey is None: 

1110 return 

1111 

1112 # create two separate directories 

1113 root1 = tempfile.mkdtemp(dir=self.root) 

1114 root2 = tempfile.mkdtemp(dir=self.root) 

1115 

1116 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1117 limited = Config(self.configFile) 

1118 butler1 = Butler(butlerConfig) 

1119 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1120 full = Config(self.tmpConfigFile) 

1121 butler2 = Butler(butlerConfig) 

1122 # Butlers should have the same configuration regardless of whether 

1123 # defaults were expanded. 

1124 self.assertEqual(butler1._config, butler2._config) 

1125 # Config files loaded directly should not be the same. 

1126 self.assertNotEqual(limited, full) 

1127 # Make sure "limited" doesn't have a few keys we know it should be 

1128 # inheriting from defaults. 

1129 self.assertIn(self.fullConfigKey, full) 

1130 self.assertNotIn(self.fullConfigKey, limited) 

1131 

1132 # Collections don't appear until something is put in them 

1133 collections1 = set(butler1.registry.queryCollections()) 

1134 self.assertEqual(collections1, set()) 

1135 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1136 

1137 # Check that a config with no associated file name will not 

1138 # work properly with relocatable Butler repo 

1139 butlerConfig.configFile = None 

1140 with self.assertRaises(ValueError): 

1141 Butler(butlerConfig) 

1142 

1143 with self.assertRaises(FileExistsError): 

1144 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1145 

1146 def testStringification(self): 

1147 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1148 butlerStr = str(butler) 

1149 

1150 if self.datastoreStr is not None: 

1151 for testStr in self.datastoreStr: 

1152 self.assertIn(testStr, butlerStr) 

1153 if self.registryStr is not None: 

1154 self.assertIn(self.registryStr, butlerStr) 

1155 

1156 datastoreName = butler.datastore.name 

1157 if self.datastoreName is not None: 

1158 for testStr in self.datastoreName: 

1159 self.assertIn(testStr, datastoreName) 

1160 

1161 def testButlerRewriteDataId(self): 

1162 """Test that dataIds can be rewritten based on dimension records.""" 

1163 

1164 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1165 

1166 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1167 datasetTypeName = "random_data" 

1168 

1169 # Create dimension records. 

1170 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1171 butler.registry.insertDimensionData( 

1172 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1173 ) 

1174 butler.registry.insertDimensionData( 

1175 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1176 ) 

1177 

1178 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1179 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1180 butler.registry.registerDatasetType(datasetType) 

1181 

1182 n_exposures = 5 

1183 dayobs = 20210530 

1184 

1185 for i in range(n_exposures): 

1186 butler.registry.insertDimensionData( 

1187 "exposure", 

1188 { 

1189 "instrument": "DummyCamComp", 

1190 "id": i, 

1191 "obs_id": f"exp{i}", 

1192 "seq_num": i, 

1193 "day_obs": dayobs, 

1194 "physical_filter": "d-r", 

1195 }, 

1196 ) 

1197 

1198 # Write some data. 

1199 for i in range(n_exposures): 

1200 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1201 

1202 # Use the seq_num for the put to test rewriting. 

1203 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1204 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1205 

1206 # Check that the exposure is correct in the dataId 

1207 self.assertEqual(ref.dataId["exposure"], i) 

1208 

1209 # and check that we can get the dataset back with the same dataId 

1210 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1211 self.assertEqual(new_metric, metric) 

1212 

1213 

1214class FileDatastoreButlerTests(ButlerTests): 

1215 """Common tests and specialization of ButlerTests for butlers backed 

1216 by datastores that inherit from FileDatastore. 

1217 """ 

1218 

1219 def checkFileExists(self, root, relpath): 

1220 """Checks if file exists at a given path (relative to root). 

1221 

1222 Test testPutTemplates verifies actual physical existance of the files 

1223 in the requested location. 

1224 """ 

1225 uri = ResourcePath(root, forceDirectory=True) 

1226 return uri.join(relpath).exists() 

1227 

1228 def testPutTemplates(self): 

1229 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1230 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1231 

1232 # Add needed Dimensions 

1233 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1234 butler.registry.insertDimensionData( 

1235 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1236 ) 

1237 butler.registry.insertDimensionData( 

1238 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1239 ) 

1240 butler.registry.insertDimensionData( 

1241 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1242 ) 

1243 

1244 # Create and store a dataset 

1245 metric = makeExampleMetrics() 

1246 

1247 # Create two almost-identical DatasetTypes (both will use default 

1248 # template) 

1249 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1250 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1251 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1252 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1253 

1254 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1255 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1256 

1257 # Put with exactly the data ID keys needed 

1258 ref = butler.put(metric, "metric1", dataId1) 

1259 uri = butler.getURI(ref) 

1260 self.assertTrue(uri.exists()) 

1261 self.assertTrue( 

1262 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1263 ) 

1264 

1265 # Check the template based on dimensions 

1266 if hasattr(butler.datastore, "templates"): 

1267 butler.datastore.templates.validateTemplates([ref]) 

1268 

1269 # Put with extra data ID keys (physical_filter is an optional 

1270 # dependency); should not change template (at least the way we're 

1271 # defining them to behave now; the important thing is that they 

1272 # must be consistent). 

1273 ref = butler.put(metric, "metric2", dataId2) 

1274 uri = butler.getURI(ref) 

1275 self.assertTrue(uri.exists()) 

1276 self.assertTrue( 

1277 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1278 ) 

1279 

1280 # Check the template based on dimensions 

1281 if hasattr(butler.datastore, "templates"): 

1282 butler.datastore.templates.validateTemplates([ref]) 

1283 

1284 # Use a template that has a typo in dimension record metadata. 

1285 # Easier to test with a butler that has a ref with records attached. 

1286 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1287 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1288 path = template.format(ref) 

1289 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1290 

1291 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1292 with self.assertRaises(KeyError): 

1293 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1294 template.format(ref) 

1295 

1296 # Now use a file template that will not result in unique filenames 

1297 with self.assertRaises(FileTemplateValidationError): 

1298 butler.put(metric, "metric3", dataId1) 

1299 

1300 def testImportExport(self): 

1301 # Run put/get tests just to create and populate a repo. 

1302 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1303 self.runImportExportTest(storageClass) 

1304 

1305 @unittest.expectedFailure 

1306 def testImportExportVirtualComposite(self): 

1307 # Run put/get tests just to create and populate a repo. 

1308 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1309 self.runImportExportTest(storageClass) 

1310 

1311 def runImportExportTest(self, storageClass): 

1312 """This test does an export to a temp directory and an import back 

1313 into a new temp directory repo. It does not assume a posix datastore""" 

1314 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1315 # Test that the repo actually has at least one dataset. 

1316 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1317 self.assertGreater(len(datasets), 0) 

1318 # Add a DimensionRecord that's unused by those datasets. 

1319 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1320 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1321 # Export and then import datasets. 

1322 with safeTestTempDir(TESTDIR) as exportDir: 

1323 exportFile = os.path.join(exportDir, "exports.yaml") 

1324 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1325 export.saveDatasets(datasets) 

1326 # Export the same datasets again. This should quietly do 

1327 # nothing because of internal deduplication, and it shouldn't 

1328 # complain about being asked to export the "htm7" elements even 

1329 # though there aren't any in these datasets or in the database. 

1330 export.saveDatasets(datasets, elements=["htm7"]) 

1331 # Save one of the data IDs again; this should be harmless 

1332 # because of internal deduplication. 

1333 export.saveDataIds([datasets[0].dataId]) 

1334 # Save some dimension records directly. 

1335 export.saveDimensionData("skymap", [skymapRecord]) 

1336 self.assertTrue(os.path.exists(exportFile)) 

1337 with safeTestTempDir(TESTDIR) as importDir: 

1338 # We always want this to be a local posix butler 

1339 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1340 # Calling script.butlerImport tests the implementation of the 

1341 # butler command line interface "import" subcommand. Functions 

1342 # in the script folder are generally considered protected and 

1343 # should not be used as public api. 

1344 with open(exportFile, "r") as f: 

1345 script.butlerImport( 

1346 importDir, 

1347 export_file=f, 

1348 directory=exportDir, 

1349 transfer="auto", 

1350 skip_dimensions=None, 

1351 reuse_ids=False, 

1352 ) 

1353 importButler = Butler(importDir, run=self.default_run) 

1354 for ref in datasets: 

1355 with self.subTest(ref=ref): 

1356 # Test for existence by passing in the DatasetType and 

1357 # data ID separately, to avoid lookup by dataset_id. 

1358 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1359 self.assertEqual( 

1360 list(importButler.registry.queryDimensionRecords("skymap")), 

1361 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1362 ) 

1363 

1364 def testRemoveRuns(self): 

1365 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1366 butler = Butler(self.tmpConfigFile, writeable=True) 

1367 # Load registry data with dimensions to hang datasets off of. 

1368 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1369 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1370 # Add some RUN-type collection. 

1371 run1 = "run1" 

1372 butler.registry.registerRun(run1) 

1373 run2 = "run2" 

1374 butler.registry.registerRun(run2) 

1375 # put a dataset in each 

1376 metric = makeExampleMetrics() 

1377 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1378 datasetType = self.addDatasetType( 

1379 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1380 ) 

1381 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1382 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1383 uri1 = butler.getURI(ref1, collections=[run1]) 

1384 uri2 = butler.getURI(ref2, collections=[run2]) 

1385 # Remove from both runs with different values for unstore. 

1386 butler.removeRuns([run1], unstore=True) 

1387 butler.removeRuns([run2], unstore=False) 

1388 # Should be nothing in registry for either one, and datastore should 

1389 # not think either exists. 

1390 with self.assertRaises(MissingCollectionError): 

1391 butler.registry.getCollectionType(run1) 

1392 with self.assertRaises(MissingCollectionError): 

1393 butler.registry.getCollectionType(run2) 

1394 self.assertFalse(butler.datastore.exists(ref1)) 

1395 self.assertFalse(butler.datastore.exists(ref2)) 

1396 # The ref we unstored should be gone according to the URI, but the 

1397 # one we forgot should still be around. 

1398 self.assertFalse(uri1.exists()) 

1399 self.assertTrue(uri2.exists()) 

1400 

1401 

1402class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1403 """PosixDatastore specialization of a butler""" 

1404 

1405 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1406 fullConfigKey = ".datastore.formatters" 

1407 validationCanFail = True 

1408 datastoreStr = ["/tmp"] 

1409 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1410 registryStr = "/gen3.sqlite3" 

1411 

1412 def testPathConstructor(self): 

1413 """Independent test of constructor using PathLike.""" 

1414 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1415 self.assertIsInstance(butler, Butler) 

1416 

1417 # And again with a Path object with the butler yaml 

1418 path = pathlib.Path(self.tmpConfigFile) 

1419 butler = Butler(path, writeable=False) 

1420 self.assertIsInstance(butler, Butler) 

1421 

1422 # And again with a Path object without the butler yaml 

1423 # (making sure we skip it if the tmp config doesn't end 

1424 # in butler.yaml -- which is the case for a subclass) 

1425 if self.tmpConfigFile.endswith("butler.yaml"): 

1426 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1427 butler = Butler(path, writeable=False) 

1428 self.assertIsInstance(butler, Butler) 

1429 

1430 def testExportTransferCopy(self): 

1431 """Test local export using all transfer modes""" 

1432 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1433 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1434 # Test that the repo actually has at least one dataset. 

1435 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1436 self.assertGreater(len(datasets), 0) 

1437 uris = [exportButler.getURI(d) for d in datasets] 

1438 datastoreRoot = exportButler.datastore.root 

1439 

1440 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1441 

1442 for path in pathsInStore: 

1443 # Assume local file system 

1444 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1445 

1446 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1447 with safeTestTempDir(TESTDIR) as exportDir: 

1448 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1449 export.saveDatasets(datasets) 

1450 for path in pathsInStore: 

1451 self.assertTrue( 

1452 self.checkFileExists(exportDir, path), 

1453 f"Check that mode {transfer} exported files", 

1454 ) 

1455 

1456 def testPruneDatasets(self): 

1457 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1458 butler = Butler(self.tmpConfigFile, writeable=True) 

1459 # Load registry data with dimensions to hang datasets off of. 

1460 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1461 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1462 # Add some RUN-type collections. 

1463 run1 = "run1" 

1464 butler.registry.registerRun(run1) 

1465 run2 = "run2" 

1466 butler.registry.registerRun(run2) 

1467 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1468 # different runs. ref3 has a different data ID. 

1469 metric = makeExampleMetrics() 

1470 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1471 datasetType = self.addDatasetType( 

1472 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1473 ) 

1474 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1475 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1476 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1477 

1478 # Simple prune. 

1479 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1480 with self.assertRaises(LookupError): 

1481 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1482 

1483 # Put data back. 

1484 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1485 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1486 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1487 

1488 # Check that in normal mode, deleting the record will lead to 

1489 # trash not touching the file. 

1490 uri1 = butler.datastore.getURI(ref1) 

1491 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1492 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1493 butler.datastore.trash(ref1) 

1494 butler.datastore.emptyTrash() 

1495 self.assertTrue(uri1.exists()) 

1496 uri1.remove() # Clean it up. 

1497 

1498 # Simulate execution butler setup by deleting the datastore 

1499 # record but keeping the file around and trusting. 

1500 butler.datastore.trustGetRequest = True 

1501 uri2 = butler.datastore.getURI(ref2) 

1502 uri3 = butler.datastore.getURI(ref3) 

1503 self.assertTrue(uri2.exists()) 

1504 self.assertTrue(uri3.exists()) 

1505 

1506 # Remove the datastore record. 

1507 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1508 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1509 self.assertTrue(uri2.exists()) 

1510 butler.datastore.trash([ref2, ref3]) 

1511 # Immediate removal for ref2 file 

1512 self.assertFalse(uri2.exists()) 

1513 # But ref3 has to wait for the empty. 

1514 self.assertTrue(uri3.exists()) 

1515 butler.datastore.emptyTrash() 

1516 self.assertFalse(uri3.exists()) 

1517 

1518 # Clear out the datasets from registry. 

1519 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1520 

1521 def testPytypeCoercion(self): 

1522 """Test python type coercion on Butler.get and put.""" 

1523 

1524 # Store some data with the normal example storage class. 

1525 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1526 datasetTypeName = "test_metric" 

1527 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1528 

1529 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1530 metric = butler.get(datasetTypeName, dataId=dataId) 

1531 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1532 

1533 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1534 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1535 

1536 # Now need to hack the registry dataset type definition. 

1537 # There is no API for this. 

1538 manager = butler.registry._managers.datasets 

1539 manager._db.update( 

1540 manager._static.dataset_type, 

1541 {"name": datasetTypeName}, 

1542 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1543 ) 

1544 

1545 # Force reset of dataset type cache 

1546 butler.registry.refresh() 

1547 

1548 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1549 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1550 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1551 

1552 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1553 self.assertNotEqual(type(metric_model), type(metric)) 

1554 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1555 

1556 # Put the model and read it back to show that everything now 

1557 # works as normal. 

1558 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1559 metric_model_new = butler.get(metric_ref) 

1560 self.assertEqual(metric_model_new, metric_model) 

1561 

1562 # Hack the storage class again to something that will fail on the 

1563 # get with no conversion class. 

1564 manager._db.update( 

1565 manager._static.dataset_type, 

1566 {"name": datasetTypeName}, 

1567 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1568 ) 

1569 butler.registry.refresh() 

1570 

1571 with self.assertRaises(ValueError): 

1572 butler.get(datasetTypeName, dataId=dataId) 

1573 

1574 

1575@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1576class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1577 """PosixDatastore specialization of a butler using Postgres""" 

1578 

1579 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1580 fullConfigKey = ".datastore.formatters" 

1581 validationCanFail = True 

1582 datastoreStr = ["/tmp"] 

1583 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1584 registryStr = "PostgreSQL@test" 

1585 

1586 @staticmethod 

1587 def _handler(postgresql): 

1588 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1589 with engine.begin() as connection: 

1590 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1591 

1592 @classmethod 

1593 def setUpClass(cls): 

1594 # Create the postgres test server. 

1595 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1596 cache_initialized_db=True, on_initialized=cls._handler 

1597 ) 

1598 super().setUpClass() 

1599 

1600 @classmethod 

1601 def tearDownClass(cls): 

1602 # Clean up any lingering SQLAlchemy engines/connections 

1603 # so they're closed before we shut down the server. 

1604 gc.collect() 

1605 cls.postgresql.clear_cache() 

1606 super().tearDownClass() 

1607 

1608 def setUp(self): 

1609 self.server = self.postgresql() 

1610 

1611 # Need to add a registry section to the config. 

1612 self._temp_config = False 

1613 config = Config(self.configFile) 

1614 config["registry", "db"] = self.server.url() 

1615 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1616 config.dump(fh) 

1617 self.configFile = fh.name 

1618 self._temp_config = True 

1619 super().setUp() 

1620 

1621 def tearDown(self): 

1622 self.server.stop() 

1623 if self._temp_config and os.path.exists(self.configFile): 

1624 os.remove(self.configFile) 

1625 super().tearDown() 

1626 

1627 def testMakeRepo(self): 

1628 # The base class test assumes that it's using sqlite and assumes 

1629 # the config file is acceptable to sqlite. 

1630 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1631 

1632 

1633class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1634 """InMemoryDatastore specialization of a butler""" 

1635 

1636 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1637 fullConfigKey = None 

1638 useTempRoot = False 

1639 validationCanFail = False 

1640 datastoreStr = ["datastore='InMemory"] 

1641 datastoreName = ["InMemoryDatastore@"] 

1642 registryStr = "/gen3.sqlite3" 

1643 

1644 def testIngest(self): 

1645 pass 

1646 

1647 

1648class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1649 """PosixDatastore specialization""" 

1650 

1651 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1652 fullConfigKey = ".datastore.datastores.1.formatters" 

1653 validationCanFail = True 

1654 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1655 datastoreName = [ 

1656 "InMemoryDatastore@", 

1657 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1658 "SecondDatastore", 

1659 ] 

1660 registryStr = "/gen3.sqlite3" 

1661 

1662 

1663class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1664 """Test that a yaml file in one location can refer to a root in another.""" 

1665 

1666 datastoreStr = ["dir1"] 

1667 # Disable the makeRepo test since we are deliberately not using 

1668 # butler.yaml as the config name. 

1669 fullConfigKey = None 

1670 

1671 def setUp(self): 

1672 self.root = makeTestTempDir(TESTDIR) 

1673 

1674 # Make a new repository in one place 

1675 self.dir1 = os.path.join(self.root, "dir1") 

1676 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1677 

1678 # Move the yaml file to a different place and add a "root" 

1679 self.dir2 = os.path.join(self.root, "dir2") 

1680 os.makedirs(self.dir2, exist_ok=True) 

1681 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1682 config = Config(configFile1) 

1683 config["root"] = self.dir1 

1684 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1685 config.dumpToUri(configFile2) 

1686 os.remove(configFile1) 

1687 self.tmpConfigFile = configFile2 

1688 

1689 def testFileLocations(self): 

1690 self.assertNotEqual(self.dir1, self.dir2) 

1691 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1692 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1693 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1694 

1695 

1696class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1697 """Test that a config file created by makeRepo outside of repo works.""" 

1698 

1699 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1700 

1701 def setUp(self): 

1702 self.root = makeTestTempDir(TESTDIR) 

1703 self.root2 = makeTestTempDir(TESTDIR) 

1704 

1705 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1706 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1707 

1708 def tearDown(self): 

1709 if os.path.exists(self.root2): 

1710 shutil.rmtree(self.root2, ignore_errors=True) 

1711 super().tearDown() 

1712 

1713 def testConfigExistence(self): 

1714 c = Config(self.tmpConfigFile) 

1715 uri_config = ResourcePath(c["root"]) 

1716 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1717 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1718 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1719 

1720 def testPutGet(self): 

1721 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1722 self.runPutGetTest(storageClass, "test_metric") 

1723 

1724 

1725class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1726 """Test that a config file created by makeRepo outside of repo works.""" 

1727 

1728 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1729 

1730 def setUp(self): 

1731 self.root = makeTestTempDir(TESTDIR) 

1732 self.root2 = makeTestTempDir(TESTDIR) 

1733 

1734 self.tmpConfigFile = self.root2 

1735 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1736 

1737 def testConfigExistence(self): 

1738 # Append the yaml file else Config constructor does not know the file 

1739 # type. 

1740 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1741 super().testConfigExistence() 

1742 

1743 

1744class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1745 """Test that a config file created by makeRepo outside of repo works.""" 

1746 

1747 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1748 

1749 def setUp(self): 

1750 self.root = makeTestTempDir(TESTDIR) 

1751 self.root2 = makeTestTempDir(TESTDIR) 

1752 

1753 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1754 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1755 

1756 

1757@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1758class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1759 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1760 a local in-memory SqlRegistry. 

1761 """ 

1762 

1763 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1764 fullConfigKey = None 

1765 validationCanFail = True 

1766 

1767 bucketName = "anybucketname" 

1768 """Name of the Bucket that will be used in the tests. The name is read from 

1769 the config file used with the tests during set-up. 

1770 """ 

1771 

1772 root = "butlerRoot/" 

1773 """Root repository directory expected to be used in case useTempRoot=False. 

1774 Otherwise the root is set to a 20 characters long randomly generated string 

1775 during set-up. 

1776 """ 

1777 

1778 datastoreStr = [f"datastore={root}"] 

1779 """Contains all expected root locations in a format expected to be 

1780 returned by Butler stringification. 

1781 """ 

1782 

1783 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1784 """The expected format of the S3 Datastore string.""" 

1785 

1786 registryStr = "/gen3.sqlite3" 

1787 """Expected format of the Registry string.""" 

1788 

1789 mock_s3 = mock_s3() 

1790 """The mocked s3 interface from moto.""" 

1791 

1792 def genRoot(self): 

1793 """Returns a random string of len 20 to serve as a root 

1794 name for the temporary bucket repo. 

1795 

1796 This is equivalent to tempfile.mkdtemp as this is what self.root 

1797 becomes when useTempRoot is True. 

1798 """ 

1799 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1800 return rndstr + "/" 

1801 

1802 def setUp(self): 

1803 config = Config(self.configFile) 

1804 uri = ResourcePath(config[".datastore.datastore.root"]) 

1805 self.bucketName = uri.netloc 

1806 

1807 # Enable S3 mocking of tests. 

1808 self.mock_s3.start() 

1809 

1810 # set up some fake credentials if they do not exist 

1811 self.usingDummyCredentials = setAwsEnvCredentials() 

1812 

1813 if self.useTempRoot: 

1814 self.root = self.genRoot() 

1815 rooturi = f"s3://{self.bucketName}/{self.root}" 

1816 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1817 

1818 # need local folder to store registry database 

1819 self.reg_dir = makeTestTempDir(TESTDIR) 

1820 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1821 

1822 # MOTO needs to know that we expect Bucket bucketname to exist 

1823 # (this used to be the class attribute bucketName) 

1824 s3 = boto3.resource("s3") 

1825 s3.create_bucket(Bucket=self.bucketName) 

1826 

1827 self.datastoreStr = f"datastore={self.root}" 

1828 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1829 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1830 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1831 

1832 def tearDown(self): 

1833 s3 = boto3.resource("s3") 

1834 bucket = s3.Bucket(self.bucketName) 

1835 try: 

1836 bucket.objects.all().delete() 

1837 except botocore.exceptions.ClientError as e: 

1838 if e.response["Error"]["Code"] == "404": 

1839 # the key was not reachable - pass 

1840 pass 

1841 else: 

1842 raise 

1843 

1844 bucket = s3.Bucket(self.bucketName) 

1845 bucket.delete() 

1846 

1847 # Stop the S3 mock. 

1848 self.mock_s3.stop() 

1849 

1850 # unset any potentially set dummy credentials 

1851 if self.usingDummyCredentials: 

1852 unsetAwsEnvCredentials() 

1853 

1854 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1855 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1856 

1857 if self.useTempRoot and os.path.exists(self.root): 

1858 shutil.rmtree(self.root, ignore_errors=True) 

1859 

1860 super().tearDown() 

1861 

1862 

1863class PosixDatastoreTransfers(unittest.TestCase): 

1864 """Test data transfers between butlers. 

1865 

1866 Test for different managers. UUID to UUID and integer to integer are 

1867 tested. UUID to integer is not supported since we do not currently 

1868 want to allow that. Integer to UUID is supported with the caveat 

1869 that UUID4 will be generated and this will be incorrect for raw 

1870 dataset types. The test ignores that. 

1871 """ 

1872 

1873 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1874 

1875 @classmethod 

1876 def setUpClass(cls): 

1877 cls.storageClassFactory = StorageClassFactory() 

1878 cls.storageClassFactory.addFromConfig(cls.configFile) 

1879 

1880 def setUp(self): 

1881 self.root = makeTestTempDir(TESTDIR) 

1882 self.config = Config(self.configFile) 

1883 

1884 def tearDown(self): 

1885 removeTestTempDir(self.root) 

1886 

1887 def create_butler(self, manager, label): 

1888 config = Config(self.configFile) 

1889 config["registry", "managers", "datasets"] = manager 

1890 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1891 

1892 def create_butlers(self, manager1, manager2): 

1893 self.source_butler = self.create_butler(manager1, "1") 

1894 self.target_butler = self.create_butler(manager2, "2") 

1895 

1896 def testTransferUuidToUuid(self): 

1897 self.create_butlers( 

1898 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1899 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1900 ) 

1901 # Setting id_gen_map should have no effect here 

1902 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1903 

1904 def testTransferMissing(self): 

1905 """Test transfers where datastore records are missing. 

1906 

1907 This is how execution butler works. 

1908 """ 

1909 self.create_butlers( 

1910 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1911 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1912 ) 

1913 

1914 # Configure the source butler to allow trust. 

1915 self.source_butler.datastore.trustGetRequest = True 

1916 

1917 self.assertButlerTransfers(purge=True) 

1918 

1919 def testTransferMissingDisassembly(self): 

1920 """Test transfers where datastore records are missing. 

1921 

1922 This is how execution butler works. 

1923 """ 

1924 self.create_butlers( 

1925 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1926 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1927 ) 

1928 

1929 # Configure the source butler to allow trust. 

1930 self.source_butler.datastore.trustGetRequest = True 

1931 

1932 # Test disassembly. 

1933 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1934 

1935 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1936 """Test that a run can be transferred to another butler.""" 

1937 

1938 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1939 datasetTypeName = "random_data" 

1940 

1941 # Test will create 3 collections and we will want to transfer 

1942 # two of those three. 

1943 runs = ["run1", "run2", "other"] 

1944 

1945 # Also want to use two different dataset types to ensure that 

1946 # grouping works. 

1947 datasetTypeNames = ["random_data", "random_data_2"] 

1948 

1949 # Create the run collections in the source butler. 

1950 for run in runs: 

1951 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1952 

1953 # Create dimensions in source butler. 

1954 n_exposures = 30 

1955 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1956 self.source_butler.registry.insertDimensionData( 

1957 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1958 ) 

1959 self.source_butler.registry.insertDimensionData( 

1960 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1961 ) 

1962 

1963 for i in range(n_exposures): 

1964 self.source_butler.registry.insertDimensionData( 

1965 "exposure", 

1966 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

1967 ) 

1968 

1969 # Create dataset types in the source butler. 

1970 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

1971 for datasetTypeName in datasetTypeNames: 

1972 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1973 self.source_butler.registry.registerDatasetType(datasetType) 

1974 

1975 # Write a dataset to an unrelated run -- this will ensure that 

1976 # we are rewriting integer dataset ids in the target if necessary. 

1977 # Will not be relevant for UUID. 

1978 run = "distraction" 

1979 butler = Butler(butler=self.source_butler, run=run) 

1980 butler.put( 

1981 makeExampleMetrics(), 

1982 datasetTypeName, 

1983 exposure=1, 

1984 instrument="DummyCamComp", 

1985 physical_filter="d-r", 

1986 ) 

1987 

1988 # Write some example metrics to the source 

1989 butler = Butler(butler=self.source_butler) 

1990 

1991 # Set of DatasetRefs that should be in the list of refs to transfer 

1992 # but which will not be transferred. 

1993 deleted = set() 

1994 

1995 n_expected = 20 # Number of datasets expected to be transferred 

1996 source_refs = [] 

1997 for i in range(n_exposures): 

1998 # Put a third of datasets into each collection, only retain 

1999 # two thirds. 

2000 index = i % 3 

2001 run = runs[index] 

2002 datasetTypeName = datasetTypeNames[i % 2] 

2003 

2004 metric_data = { 

2005 "summary": {"counter": i}, 

2006 "output": {"text": "metric"}, 

2007 "data": [2 * x for x in range(i)], 

2008 } 

2009 metric = MetricsExample(**metric_data) 

2010 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2011 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2012 

2013 # Remove the datastore record using low-level API 

2014 if purge: 

2015 # Remove records for a fraction. 

2016 if index == 1: 

2017 # For one of these delete the file as well. 

2018 # This allows the "missing" code to filter the 

2019 # file out. 

2020 if not deleted: 

2021 primary, uris = butler.datastore.getURIs(ref) 

2022 if primary: 

2023 primary.remove() 

2024 for uri in uris.values(): 

2025 uri.remove() 

2026 n_expected -= 1 

2027 deleted.add(ref) 

2028 

2029 # Remove the datastore record. 

2030 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2031 

2032 if index < 2: 

2033 source_refs.append(ref) 

2034 if ref not in deleted: 

2035 new_metric = butler.get(ref.unresolved(), collections=run) 

2036 self.assertEqual(new_metric, metric) 

2037 

2038 # Create some bad dataset types to ensure we check for inconsistent 

2039 # definitions. 

2040 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2041 for datasetTypeName in datasetTypeNames: 

2042 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2043 self.target_butler.registry.registerDatasetType(datasetType) 

2044 with self.assertRaises(ConflictingDefinitionError) as cm: 

2045 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2046 self.assertIn("dataset type differs", str(cm.exception)) 

2047 

2048 # And remove the bad definitions. 

2049 for datasetTypeName in datasetTypeNames: 

2050 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2051 

2052 # Transfer without creating dataset types should fail. 

2053 with self.assertRaises(KeyError): 

2054 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2055 

2056 # Transfer without creating dimensions should fail. 

2057 with self.assertRaises(ConflictingDefinitionError) as cm: 

2058 self.target_butler.transfer_from( 

2059 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2060 ) 

2061 self.assertIn("dimension", str(cm.exception)) 

2062 

2063 # The failed transfer above leaves registry in an inconsistent 

2064 # state because the run is created but then rolled back without 

2065 # the collection cache being cleared. For now force a refresh. 

2066 # Can remove with DM-35498. 

2067 self.target_butler.registry.refresh() 

2068 

2069 # Now transfer them to the second butler, including dimensions. 

2070 with self.assertLogs(level=logging.DEBUG) as cm: 

2071 transferred = self.target_butler.transfer_from( 

2072 self.source_butler, 

2073 source_refs, 

2074 id_gen_map=id_gen_map, 

2075 register_dataset_types=True, 

2076 transfer_dimensions=True, 

2077 ) 

2078 self.assertEqual(len(transferred), n_expected) 

2079 log_output = ";".join(cm.output) 

2080 self.assertIn("found in datastore for chunk", log_output) 

2081 self.assertIn("Creating output run", log_output) 

2082 

2083 # Do the transfer twice to ensure that it will do nothing extra. 

2084 # Only do this if purge=True because it does not work for int 

2085 # dataset_id. 

2086 if purge: 

2087 # This should not need to register dataset types. 

2088 transferred = self.target_butler.transfer_from( 

2089 self.source_butler, source_refs, id_gen_map=id_gen_map 

2090 ) 

2091 self.assertEqual(len(transferred), n_expected) 

2092 

2093 # Also do an explicit low-level transfer to trigger some 

2094 # edge cases. 

2095 with self.assertLogs(level=logging.DEBUG) as cm: 

2096 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2097 log_output = ";".join(cm.output) 

2098 self.assertIn("no file artifacts exist", log_output) 

2099 

2100 with self.assertRaises(TypeError): 

2101 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2102 

2103 with self.assertRaises(ValueError): 

2104 self.target_butler.datastore.transfer_from( 

2105 self.source_butler.datastore, source_refs, transfer="split" 

2106 ) 

2107 

2108 # Now try to get the same refs from the new butler. 

2109 for ref in source_refs: 

2110 if ref not in deleted: 

2111 unresolved_ref = ref.unresolved() 

2112 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2113 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2114 self.assertEqual(new_metric, old_metric) 

2115 

2116 # Now prune run2 collection and create instead a CHAINED collection. 

2117 # This should block the transfer. 

2118 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2119 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2120 with self.assertRaises(CollectionTypeError): 

2121 # Re-importing the run1 datasets can be problematic if they 

2122 # use integer IDs so filter those out. 

2123 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2124 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2125 

2126 

2127if __name__ == "__main__": 2127 ↛ 2128line 2127 didn't jump to line 2128, because the condition on line 2127 was never true

2128 unittest.main()