Coverage for tests/test_butler.py: 13%

1173 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-06 02:34 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24from __future__ import annotations 

25 

26import gc 

27import json 

28import logging 

29import os 

30import pathlib 

31import pickle 

32import posixpath 

33import random 

34import shutil 

35import string 

36import tempfile 

37import unittest 

38import uuid 

39from collections.abc import Mapping 

40from typing import TYPE_CHECKING, Any, cast 

41 

42try: 

43 import boto3 

44 import botocore 

45 from moto import mock_s3 # type: ignore[import] 

46except ImportError: 

47 boto3 = None 

48 

49 def mock_s3(cls): 

50 """A no-op decorator in case moto mock_s3 can not be imported.""" 

51 return cls 

52 

53 

54try: 

55 # It's possible but silly to have testing.postgresql installed without 

56 # having the postgresql server installed (because then nothing in 

57 # testing.postgresql would work), so we use the presence of that module 

58 # to test whether we can expect the server to be available. 

59 import testing.postgresql # type: ignore[import] 

60except ImportError: 

61 testing = None 

62 

63import astropy.time 

64import sqlalchemy 

65from lsst.daf.butler import ( 

66 Butler, 

67 ButlerConfig, 

68 CollectionType, 

69 Config, 

70 DataCoordinate, 

71 DatasetRef, 

72 DatasetType, 

73 FileDataset, 

74 FileTemplate, 

75 FileTemplateValidationError, 

76 StorageClassFactory, 

77 ValidationError, 

78 script, 

79) 

80from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

81from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

82from lsst.daf.butler.registries.sql import SqlRegistry 

83from lsst.daf.butler.registry import ( 

84 CollectionError, 

85 CollectionTypeError, 

86 ConflictingDefinitionError, 

87 DataIdValueError, 

88 MissingCollectionError, 

89 OrphanedRecordError, 

90) 

91from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

92from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

93from lsst.resources import ResourcePath 

94from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

95from lsst.utils import doImportType 

96from lsst.utils.introspection import get_full_type_name 

97 

98if TYPE_CHECKING: 

99 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass 

100 

101TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

102 

103 

104def makeExampleMetrics(): 

105 return MetricsExample( 

106 {"AM1": 5.2, "AM2": 30.6}, 

107 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

108 [563, 234, 456.7, 752, 8, 9, 27], 

109 ) 

110 

111 

112class TransactionTestError(Exception): 

113 """Specific error for testing transactions, to prevent misdiagnosing 

114 that might otherwise occur when a standard exception is used. 

115 """ 

116 

117 pass 

118 

119 

120class ButlerConfigTests(unittest.TestCase): 

121 """Simple tests for ButlerConfig that are not tested in any other test 

122 cases.""" 

123 

124 def testSearchPath(self): 

125 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

126 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

127 config1 = ButlerConfig(configFile) 

128 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

129 

130 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

131 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

132 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

133 self.assertIn("testConfigs", "\n".join(cm.output)) 

134 

135 key = ("datastore", "records", "table") 

136 self.assertNotEqual(config1[key], config2[key]) 

137 self.assertEqual(config2[key], "override_record") 

138 

139 

140class ButlerPutGetTests(TestCaseMixin): 

141 """Helper method for running a suite of put/get tests from different 

142 butler configurations.""" 

143 

144 root: str 

145 default_run = "ingésτ😺" 

146 storageClassFactory: StorageClassFactory 

147 configFile: str 

148 tmpConfigFile: str 

149 

150 @staticmethod 

151 def addDatasetType( 

152 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry 

153 ) -> DatasetType: 

154 """Create a DatasetType and register it""" 

155 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

156 registry.registerDatasetType(datasetType) 

157 return datasetType 

158 

159 @classmethod 

160 def setUpClass(cls) -> None: 

161 cls.storageClassFactory = StorageClassFactory() 

162 cls.storageClassFactory.addFromConfig(cls.configFile) 

163 

164 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None) -> None: 

165 datasetType = datasetRef.datasetType 

166 dataId = datasetRef.dataId 

167 deferred = butler.getDeferred(datasetRef) 

168 

169 for component in components: 

170 compTypeName = datasetType.componentTypeName(component) 

171 result = butler.get(compTypeName, dataId, collections=collections) 

172 self.assertEqual(result, getattr(reference, component)) 

173 result_deferred = deferred.get(component=component) 

174 self.assertEqual(result_deferred, result) 

175 

176 def tearDown(self) -> None: 

177 removeTestTempDir(self.root) 

178 

179 def create_butler( 

180 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

181 ) -> tuple[Butler, DatasetType]: 

182 butler = Butler(self.tmpConfigFile, run=run) 

183 

184 collections = set(butler.registry.queryCollections()) 

185 self.assertEqual(collections, set([run])) 

186 

187 # Create and register a DatasetType 

188 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

189 

190 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

191 

192 # Add needed Dimensions 

193 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

194 butler.registry.insertDimensionData( 

195 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

196 ) 

197 butler.registry.insertDimensionData( 

198 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

199 ) 

200 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

201 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

202 butler.registry.insertDimensionData( 

203 "visit", 

204 { 

205 "instrument": "DummyCamComp", 

206 "id": 423, 

207 "name": "fourtwentythree", 

208 "physical_filter": "d-r", 

209 "visit_system": 1, 

210 "datetime_begin": visit_start, 

211 "datetime_end": visit_end, 

212 }, 

213 ) 

214 

215 # Add more visits for some later tests 

216 for visit_id in (424, 425): 

217 butler.registry.insertDimensionData( 

218 "visit", 

219 { 

220 "instrument": "DummyCamComp", 

221 "id": visit_id, 

222 "name": f"fourtwentyfour_{visit_id}", 

223 "physical_filter": "d-r", 

224 "visit_system": 1, 

225 }, 

226 ) 

227 return butler, datasetType 

228 

229 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: 

230 # New datasets will be added to run and tag, but we will only look in 

231 # tag when looking up datasets. 

232 run = self.default_run 

233 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

234 assert butler.run is not None 

235 

236 # Create and store a dataset 

237 metric = makeExampleMetrics() 

238 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

239 

240 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

241 # and once with a DatasetType 

242 

243 # Keep track of any collections we add and do not clean up 

244 expected_collections = {run} 

245 

246 counter = 0 

247 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

248 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

249 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

250 # Since we are using subTest we can get cascading failures 

251 # here with the first attempt failing and the others failing 

252 # immediately because the dataset already exists. Work around 

253 # this by using a distinct run collection each time 

254 counter += 1 

255 this_run = f"put_run_{counter}" 

256 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

257 expected_collections.update({this_run}) 

258 

259 with self.subTest(args=args): 

260 ref = butler.put(metric, *args, run=this_run) 

261 self.assertIsInstance(ref, DatasetRef) 

262 

263 # Test getDirect 

264 metricOut = butler.get(ref) 

265 self.assertEqual(metric, metricOut) 

266 # Test get 

267 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

268 self.assertEqual(metric, metricOut) 

269 # Test get with a datasetRef 

270 metricOut = butler.get(ref, collections=this_run) 

271 self.assertEqual(metric, metricOut) 

272 # Test getDeferred with dataId 

273 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

274 self.assertEqual(metric, metricOut) 

275 # Test getDeferred with a datasetRef 

276 metricOut = butler.getDeferred(ref, collections=this_run).get() 

277 self.assertEqual(metric, metricOut) 

278 # and deferred direct with ref 

279 metricOut = butler.getDeferred(ref).get() 

280 self.assertEqual(metric, metricOut) 

281 

282 # Check we can get components 

283 if storageClass.isComposite(): 

284 self.assertGetComponents( 

285 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

286 ) 

287 

288 # Can the artifacts themselves be retrieved? 

289 if not butler.datastore.isEphemeral: 

290 root_uri = ResourcePath(self.root) 

291 

292 for preserve_path in (True, False): 

293 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

294 # Use copy so that we can test that overwrite 

295 # protection works (using "auto" for File URIs would 

296 # use hard links and subsequent transfer would work 

297 # because it knows they are the same file). 

298 transferred = butler.retrieveArtifacts( 

299 [ref], destination, preserve_path=preserve_path, transfer="copy" 

300 ) 

301 self.assertGreater(len(transferred), 0) 

302 artifacts = list(ResourcePath.findFileResources([destination])) 

303 self.assertEqual(set(transferred), set(artifacts)) 

304 

305 for artifact in transferred: 

306 path_in_destination = artifact.relative_to(destination) 

307 self.assertIsNotNone(path_in_destination) 

308 assert path_in_destination is not None 

309 

310 # when path is not preserved there should not be 

311 # any path separators. 

312 num_seps = path_in_destination.count("/") 

313 if preserve_path: 

314 self.assertGreater(num_seps, 0) 

315 else: 

316 self.assertEqual(num_seps, 0) 

317 

318 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

319 n_uris = len(secondary_uris) 

320 if primary_uri: 

321 n_uris += 1 

322 self.assertEqual( 

323 len(artifacts), 

324 n_uris, 

325 "Comparing expected artifacts vs actual:" 

326 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

327 ) 

328 

329 if preserve_path: 

330 # No need to run these twice 

331 with self.assertRaises(ValueError): 

332 butler.retrieveArtifacts([ref], destination, transfer="move") 

333 

334 with self.assertRaises(FileExistsError): 

335 butler.retrieveArtifacts([ref], destination) 

336 

337 transferred_again = butler.retrieveArtifacts( 

338 [ref], destination, preserve_path=preserve_path, overwrite=True 

339 ) 

340 self.assertEqual(set(transferred_again), set(transferred)) 

341 

342 # Now remove the dataset completely. 

343 butler.pruneDatasets([ref], purge=True, unstore=True) 

344 # Lookup with original args should still fail. 

345 with self.assertRaises(LookupError): 

346 butler.datasetExists(*args, collections=this_run) 

347 # get() should still fail. 

348 with self.assertRaises(FileNotFoundError): 

349 butler.get(ref) 

350 # Registry shouldn't be able to find it by dataset_id anymore. 

351 self.assertIsNone(butler.registry.getDataset(ref.id)) 

352 

353 # Do explicit registry removal since we know they are 

354 # empty 

355 butler.registry.removeCollection(this_run) 

356 expected_collections.remove(this_run) 

357 

358 # Create DatasetRef for put using default run. 

359 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

360 

361 # Put the dataset again, since the last thing we did was remove it 

362 # and we want to use the default collection. 

363 ref = butler.put(metric, refIn) 

364 

365 # Get with parameters 

366 stop = 4 

367 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

368 self.assertNotEqual(metric, sliced) 

369 self.assertEqual(metric.summary, sliced.summary) 

370 self.assertEqual(metric.output, sliced.output) 

371 self.assertEqual(metric.data[:stop], sliced.data) 

372 # getDeferred with parameters 

373 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

374 self.assertNotEqual(metric, sliced) 

375 self.assertEqual(metric.summary, sliced.summary) 

376 self.assertEqual(metric.output, sliced.output) 

377 self.assertEqual(metric.data[:stop], sliced.data) 

378 # getDeferred with deferred parameters 

379 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

380 self.assertNotEqual(metric, sliced) 

381 self.assertEqual(metric.summary, sliced.summary) 

382 self.assertEqual(metric.output, sliced.output) 

383 self.assertEqual(metric.data[:stop], sliced.data) 

384 

385 if storageClass.isComposite(): 

386 # Check that components can be retrieved 

387 metricOut = butler.get(ref.datasetType.name, dataId) 

388 compNameS = ref.datasetType.componentTypeName("summary") 

389 compNameD = ref.datasetType.componentTypeName("data") 

390 summary = butler.get(compNameS, dataId) 

391 self.assertEqual(summary, metric.summary) 

392 data = butler.get(compNameD, dataId) 

393 self.assertEqual(data, metric.data) 

394 

395 if "counter" in storageClass.derivedComponents: 

396 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

397 self.assertEqual(count, len(data)) 

398 

399 count = butler.get( 

400 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

401 ) 

402 self.assertEqual(count, stop) 

403 

404 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

405 assert compRef is not None 

406 summary = butler.get(compRef) 

407 self.assertEqual(summary, metric.summary) 

408 

409 # Create a Dataset type that has the same name but is inconsistent. 

410 inconsistentDatasetType = DatasetType( 

411 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

412 ) 

413 

414 # Getting with a dataset type that does not match registry fails 

415 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"): 

416 butler.get(inconsistentDatasetType, dataId) 

417 

418 # Combining a DatasetRef with a dataId should fail 

419 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

420 butler.get(ref, dataId) 

421 # Getting with an explicit ref should fail if the id doesn't match. 

422 with self.assertRaises(FileNotFoundError): 

423 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

424 

425 # Getting a dataset with unknown parameters should fail 

426 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

427 butler.get(ref, parameters={"unsupported": True}) 

428 

429 # Check we have a collection 

430 collections = set(butler.registry.queryCollections()) 

431 self.assertEqual(collections, expected_collections) 

432 

433 # Clean up to check that we can remove something that may have 

434 # already had a component removed 

435 butler.pruneDatasets([ref], unstore=True, purge=True) 

436 

437 # Add the same ref again, so we can check that duplicate put fails. 

438 ref = butler.put(metric, datasetType, dataId) 

439 

440 # Repeat put will fail. 

441 with self.assertRaisesRegex( 

442 ConflictingDefinitionError, "A database constraint failure was triggered" 

443 ): 

444 butler.put(metric, datasetType, dataId) 

445 

446 # Remove the datastore entry. 

447 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

448 

449 # Put will still fail 

450 with self.assertRaisesRegex( 

451 ConflictingDefinitionError, "A database constraint failure was triggered" 

452 ): 

453 butler.put(metric, datasetType, dataId) 

454 

455 # Repeat the same sequence with resolved ref. 

456 butler.pruneDatasets([ref], unstore=True, purge=True) 

457 ref = butler.put(metric, refIn) 

458 

459 # Repeat put will fail. 

460 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

461 butler.put(metric, refIn) 

462 

463 # Remove the datastore entry. 

464 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

465 

466 # In case of resolved ref this write will succeed. 

467 ref = butler.put(metric, refIn) 

468 

469 # Leave the dataset in place since some downstream tests require 

470 # something to be present 

471 

472 return butler 

473 

474 def testDeferredCollectionPassing(self) -> None: 

475 # Construct a butler with no run or collection, but make it writeable. 

476 butler = Butler(self.tmpConfigFile, writeable=True) 

477 # Create and register a DatasetType 

478 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

479 datasetType = self.addDatasetType( 

480 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

481 ) 

482 # Add needed Dimensions 

483 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

484 butler.registry.insertDimensionData( 

485 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

486 ) 

487 butler.registry.insertDimensionData( 

488 "visit", 

489 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

490 ) 

491 dataId = {"instrument": "DummyCamComp", "visit": 423} 

492 # Create dataset. 

493 metric = makeExampleMetrics() 

494 # Register a new run and put dataset. 

495 run = "deferred" 

496 self.assertTrue(butler.registry.registerRun(run)) 

497 # Second time it will be allowed but indicate no-op 

498 self.assertFalse(butler.registry.registerRun(run)) 

499 ref = butler.put(metric, datasetType, dataId, run=run) 

500 # Putting with no run should fail with TypeError. 

501 with self.assertRaises(CollectionError): 

502 butler.put(metric, datasetType, dataId) 

503 # Dataset should exist. 

504 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

505 # We should be able to get the dataset back, but with and without 

506 # a deferred dataset handle. 

507 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

508 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

509 # Trying to find the dataset without any collection is a TypeError. 

510 with self.assertRaises(CollectionError): 

511 butler.datasetExists(datasetType, dataId) 

512 with self.assertRaises(CollectionError): 

513 butler.get(datasetType, dataId) 

514 # Associate the dataset with a different collection. 

515 butler.registry.registerCollection("tagged") 

516 butler.registry.associate("tagged", [ref]) 

517 # Deleting the dataset from the new collection should make it findable 

518 # in the original collection. 

519 butler.pruneDatasets([ref], tags=["tagged"]) 

520 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

521 

522 

523class ButlerTests(ButlerPutGetTests): 

524 """Tests for Butler.""" 

525 

526 useTempRoot = True 

527 validationCanFail: bool 

528 fullConfigKey: str | None 

529 registryStr: str | None 

530 datastoreName: list[str] | None 

531 datastoreStr: list[str] 

532 

533 def setUp(self) -> None: 

534 """Create a new butler root for each test.""" 

535 self.root = makeTestTempDir(TESTDIR) 

536 Butler.makeRepo(self.root, config=Config(self.configFile)) 

537 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

538 

539 def testConstructor(self) -> None: 

540 """Independent test of constructor.""" 

541 butler = Butler(self.tmpConfigFile, run=self.default_run) 

542 self.assertIsInstance(butler, Butler) 

543 

544 # Check that butler.yaml is added automatically. 

545 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

546 config_dir = self.tmpConfigFile[: -len(end)] 

547 butler = Butler(config_dir, run=self.default_run) 

548 self.assertIsInstance(butler, Butler) 

549 

550 # Even with a ResourcePath. 

551 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

552 self.assertIsInstance(butler, Butler) 

553 

554 collections = set(butler.registry.queryCollections()) 

555 self.assertEqual(collections, {self.default_run}) 

556 

557 # Check that some special characters can be included in run name. 

558 special_run = "u@b.c-A" 

559 butler_special = Butler(butler=butler, run=special_run) 

560 collections = set(butler_special.registry.queryCollections("*@*")) 

561 self.assertEqual(collections, {special_run}) 

562 

563 butler2 = Butler(butler=butler, collections=["other"]) 

564 self.assertEqual(butler2.collections, ("other",)) 

565 self.assertIsNone(butler2.run) 

566 self.assertIs(butler.datastore, butler2.datastore) 

567 

568 # Test that we can use an environment variable to find this 

569 # repository. 

570 butler_index = Config() 

571 butler_index["label"] = self.tmpConfigFile 

572 for suffix in (".yaml", ".json"): 

573 # Ensure that the content differs so that we know that 

574 # we aren't reusing the cache. 

575 bad_label = f"s3://bucket/not_real{suffix}" 

576 butler_index["bad_label"] = bad_label 

577 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

578 butler_index.dumpToUri(temp_file) 

579 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

580 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

581 uri = Butler.get_repo_uri("bad_label") 

582 self.assertEqual(uri, ResourcePath(bad_label)) 

583 uri = Butler.get_repo_uri("label") 

584 butler = Butler(uri, writeable=False) 

585 self.assertIsInstance(butler, Butler) 

586 butler = Butler("label", writeable=False) 

587 self.assertIsInstance(butler, Butler) 

588 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

589 Butler("not_there", writeable=False) 

590 with self.assertRaises(KeyError) as cm: 

591 Butler.get_repo_uri("missing") 

592 self.assertEqual(Butler.get_repo_uri("missing", True), ResourcePath("missing")) 

593 self.assertIn("not known to", str(cm.exception)) 

594 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

595 with self.assertRaises(FileNotFoundError): 

596 Butler.get_repo_uri("label") 

597 self.assertEqual(Butler.get_known_repos(), set()) 

598 with self.assertRaises(KeyError) as cm: 

599 # No environment variable set. 

600 Butler.get_repo_uri("label") 

601 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label")) 

602 self.assertIn("No repository index defined", str(cm.exception)) 

603 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

604 # No aliases registered. 

605 Butler("not_there") 

606 self.assertEqual(Butler.get_known_repos(), set()) 

607 

608 def testBasicPutGet(self) -> None: 

609 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

610 self.runPutGetTest(storageClass, "test_metric") 

611 

612 def testCompositePutGetConcrete(self) -> None: 

613 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

614 butler = self.runPutGetTest(storageClass, "test_metric") 

615 

616 # Should *not* be disassembled 

617 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

618 self.assertEqual(len(datasets), 1) 

619 uri, components = butler.getURIs(datasets[0]) 

620 self.assertIsInstance(uri, ResourcePath) 

621 self.assertFalse(components) 

622 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

623 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

624 

625 # Predicted dataset 

626 dataId = {"instrument": "DummyCamComp", "visit": 424} 

627 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

628 self.assertFalse(components) 

629 self.assertIsInstance(uri, ResourcePath) 

630 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

631 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

632 

633 def testCompositePutGetVirtual(self) -> None: 

634 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

635 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

636 

637 # Should be disassembled 

638 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

639 self.assertEqual(len(datasets), 1) 

640 uri, components = butler.getURIs(datasets[0]) 

641 

642 if butler.datastore.isEphemeral: 

643 # Never disassemble in-memory datastore 

644 self.assertIsInstance(uri, ResourcePath) 

645 self.assertFalse(components) 

646 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

647 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

648 else: 

649 self.assertIsNone(uri) 

650 self.assertEqual(set(components), set(storageClass.components)) 

651 for compuri in components.values(): 

652 self.assertIsInstance(compuri, ResourcePath) 

653 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

654 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

655 

656 # Predicted dataset 

657 dataId = {"instrument": "DummyCamComp", "visit": 424} 

658 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

659 

660 if butler.datastore.isEphemeral: 

661 # Never disassembled 

662 self.assertIsInstance(uri, ResourcePath) 

663 self.assertFalse(components) 

664 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

665 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

666 else: 

667 self.assertIsNone(uri) 

668 self.assertEqual(set(components), set(storageClass.components)) 

669 for compuri in components.values(): 

670 self.assertIsInstance(compuri, ResourcePath) 

671 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

672 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

673 

674 def testStorageClassOverrideGet(self) -> None: 

675 """Test storage class conversion on get with override.""" 

676 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

677 datasetTypeName = "anything" 

678 run = self.default_run 

679 

680 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

681 

682 # Create and store a dataset. 

683 metric = makeExampleMetrics() 

684 dataId = {"instrument": "DummyCamComp", "visit": 423} 

685 

686 ref = butler.put(metric, datasetType, dataId) 

687 

688 # Return native type. 

689 retrieved = butler.get(ref) 

690 self.assertEqual(retrieved, metric) 

691 

692 # Specify an override. 

693 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

694 model = butler.get(ref, storageClass=new_sc) 

695 self.assertNotEqual(type(model), type(retrieved)) 

696 self.assertIs(type(model), new_sc.pytype) 

697 self.assertEqual(retrieved, model) 

698 

699 # Defer but override later. 

700 deferred = butler.getDeferred(ref) 

701 model = deferred.get(storageClass=new_sc) 

702 self.assertIs(type(model), new_sc.pytype) 

703 self.assertEqual(retrieved, model) 

704 

705 # Defer but override up front. 

706 deferred = butler.getDeferred(ref, storageClass=new_sc) 

707 model = deferred.get() 

708 self.assertIs(type(model), new_sc.pytype) 

709 self.assertEqual(retrieved, model) 

710 

711 # Retrieve a component. Should be a tuple. 

712 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

713 self.assertIs(type(data), tuple) 

714 self.assertEqual(data, tuple(retrieved.data)) 

715 

716 # Parameter on the write storage class should work regardless 

717 # of read storage class. 

718 data = butler.get( 

719 "anything.data", 

720 dataId, 

721 storageClass="StructuredDataDataTestTuple", 

722 parameters={"slice": slice(2, 4)}, 

723 ) 

724 self.assertEqual(len(data), 2) 

725 

726 # Try a parameter that is known to the read storage class but not 

727 # the write storage class. 

728 with self.assertRaises(KeyError): 

729 butler.get( 

730 "anything.data", 

731 dataId, 

732 storageClass="StructuredDataDataTestTuple", 

733 parameters={"xslice": slice(2, 4)}, 

734 ) 

735 

736 def testPytypePutCoercion(self) -> None: 

737 """Test python type coercion on Butler.get and put.""" 

738 

739 # Store some data with the normal example storage class. 

740 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

741 datasetTypeName = "test_metric" 

742 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

743 

744 dataId = {"instrument": "DummyCamComp", "visit": 423} 

745 

746 # Put a dict and this should coerce to a MetricsExample 

747 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

748 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

749 test_metric = butler.get(metric_ref) 

750 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

751 self.assertEqual(test_metric.summary, test_dict["summary"]) 

752 self.assertEqual(test_metric.output, test_dict["output"]) 

753 

754 # Check that the put still works if a DatasetType is given with 

755 # a definition matching this python type. 

756 registry_type = butler.registry.getDatasetType(datasetTypeName) 

757 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

758 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

759 self.assertEqual(metric2_ref.datasetType, registry_type) 

760 

761 # The get will return the type expected by registry. 

762 test_metric2 = butler.get(metric2_ref) 

763 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

764 

765 # Make a new DatasetRef with the compatible but different DatasetType. 

766 # This should now return a dict. 

767 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

768 test_dict2 = butler.get(new_ref) 

769 self.assertEqual(get_full_type_name(test_dict2), "dict") 

770 

771 # Get it again with the wrong dataset type definition using get() 

772 # rather than get(). This should be consistent with get() 

773 # behavior and return the type of the DatasetType. 

774 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

775 self.assertEqual(get_full_type_name(test_dict3), "dict") 

776 

777 def testIngest(self) -> None: 

778 butler = Butler(self.tmpConfigFile, run=self.default_run) 

779 

780 # Create and register a DatasetType 

781 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

782 

783 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

784 datasetTypeName = "metric" 

785 

786 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

787 

788 # Add needed Dimensions 

789 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

790 butler.registry.insertDimensionData( 

791 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

792 ) 

793 for detector in (1, 2): 

794 butler.registry.insertDimensionData( 

795 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

796 ) 

797 

798 butler.registry.insertDimensionData( 

799 "visit", 

800 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

801 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

802 ) 

803 

804 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

805 dataRoot = os.path.join(TESTDIR, "data", "basic") 

806 datasets = [] 

807 for detector in (1, 2): 

808 detector_name = f"detector_{detector}" 

809 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

810 dataId = butler.registry.expandDataId( 

811 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

812 ) 

813 # Create a DatasetRef for ingest 

814 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

815 

816 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

817 

818 butler.ingest(*datasets, transfer="copy") 

819 

820 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

821 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

822 

823 metrics1 = butler.get(datasetTypeName, dataId1) 

824 metrics2 = butler.get(datasetTypeName, dataId2) 

825 self.assertNotEqual(metrics1, metrics2) 

826 

827 # Compare URIs 

828 uri1 = butler.getURI(datasetTypeName, dataId1) 

829 uri2 = butler.getURI(datasetTypeName, dataId2) 

830 self.assertNotEqual(uri1, uri2) 

831 

832 # Now do a multi-dataset but single file ingest 

833 metricFile = os.path.join(dataRoot, "detectors.yaml") 

834 refs = [] 

835 for detector in (1, 2): 

836 detector_name = f"detector_{detector}" 

837 dataId = butler.registry.expandDataId( 

838 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

839 ) 

840 # Create a DatasetRef for ingest 

841 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

842 

843 # Test "move" transfer to ensure that the files themselves 

844 # have disappeared following ingest. 

845 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

846 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

847 

848 datasets = [] 

849 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

850 

851 # For first ingest use copy. 

852 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

853 

854 # Now try to ingest again in "execution butler" mode where 

855 # the registry entries exist but the datastore does not have 

856 # the files. We also need to strip the dimension records to ensure 

857 # that they will be re-added by the ingest. 

858 ref = datasets[0].refs[0] 

859 datasets[0].refs = [ 

860 cast( 

861 DatasetRef, 

862 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run), 

863 ) 

864 for ref in datasets[0].refs 

865 ] 

866 all_refs = [] 

867 for dataset in datasets: 

868 refs = [] 

869 for ref in dataset.refs: 

870 # Create a dict from the dataId to drop the records. 

871 new_data_id = {str(k): v for k, v in ref.dataId.items()} 

872 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run) 

873 assert new_ref is not None 

874 self.assertFalse(new_ref.dataId.hasRecords()) 

875 refs.append(new_ref) 

876 dataset.refs = refs 

877 all_refs.extend(dataset.refs) 

878 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

879 

880 # Use move mode to test that the file is deleted. Also 

881 # disable recording of file size. 

882 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

883 

884 # Check that every ref now has records. 

885 for dataset in datasets: 

886 for ref in dataset.refs: 

887 self.assertTrue(ref.dataId.hasRecords()) 

888 

889 # Ensure that the file has disappeared. 

890 self.assertFalse(tempFile.exists()) 

891 

892 # Check that the datastore recorded no file size. 

893 # Not all datastores can support this. 

894 try: 

895 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

896 self.assertEqual(infos[0].file_size, -1) 

897 except AttributeError: 

898 pass 

899 

900 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

901 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

902 

903 multi1 = butler.get(datasetTypeName, dataId1) 

904 multi2 = butler.get(datasetTypeName, dataId2) 

905 

906 self.assertEqual(multi1, metrics1) 

907 self.assertEqual(multi2, metrics2) 

908 

909 # Compare URIs 

910 uri1 = butler.getURI(datasetTypeName, dataId1) 

911 uri2 = butler.getURI(datasetTypeName, dataId2) 

912 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

913 

914 # Test that removing one does not break the second 

915 # This line will issue a warning log message for a ChainedDatastore 

916 # that uses an InMemoryDatastore since in-memory can not ingest 

917 # files. 

918 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

919 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

920 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

921 multi2b = butler.get(datasetTypeName, dataId2) 

922 self.assertEqual(multi2, multi2b) 

923 

924 # Ensure we can ingest 0 datasets 

925 datasets = [] 

926 butler.ingest(*datasets) 

927 

928 def testPickle(self) -> None: 

929 """Test pickle support.""" 

930 butler = Butler(self.tmpConfigFile, run=self.default_run) 

931 butlerOut = pickle.loads(pickle.dumps(butler)) 

932 self.assertIsInstance(butlerOut, Butler) 

933 self.assertEqual(butlerOut._config, butler._config) 

934 self.assertEqual(butlerOut.collections, butler.collections) 

935 self.assertEqual(butlerOut.run, butler.run) 

936 

937 def testGetDatasetTypes(self) -> None: 

938 butler = Butler(self.tmpConfigFile, run=self.default_run) 

939 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

940 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

941 ( 

942 "instrument", 

943 [ 

944 {"instrument": "DummyCam"}, 

945 {"instrument": "DummyHSC"}, 

946 {"instrument": "DummyCamComp"}, 

947 ], 

948 ), 

949 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

950 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]), 

951 ] 

952 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

953 # Add needed Dimensions 

954 for element, data in dimensionEntries: 

955 butler.registry.insertDimensionData(element, *data) 

956 

957 # When a DatasetType is added to the registry entries are not created 

958 # for components but querying them can return the components. 

959 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

960 components = set() 

961 for datasetTypeName in datasetTypeNames: 

962 # Create and register a DatasetType 

963 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

964 

965 for componentName in storageClass.components: 

966 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

967 

968 fromRegistry: set[DatasetType] = set() 

969 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

970 fromRegistry.add(parent_dataset_type) 

971 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

972 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

973 

974 # Now that we have some dataset types registered, validate them 

975 butler.validateConfiguration( 

976 ignore=[ 

977 "test_metric_comp", 

978 "metric3", 

979 "metric5", 

980 "calexp", 

981 "DummySC", 

982 "datasetType.component", 

983 "random_data", 

984 "random_data_2", 

985 ] 

986 ) 

987 

988 # Add a new datasetType that will fail template validation 

989 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

990 if self.validationCanFail: 

991 with self.assertRaises(ValidationError): 

992 butler.validateConfiguration() 

993 

994 # Rerun validation but with a subset of dataset type names 

995 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

996 

997 # Rerun validation but ignore the bad datasetType 

998 butler.validateConfiguration( 

999 ignore=[ 

1000 "test_metric_comp", 

1001 "metric3", 

1002 "metric5", 

1003 "calexp", 

1004 "DummySC", 

1005 "datasetType.component", 

1006 "random_data", 

1007 "random_data_2", 

1008 ] 

1009 ) 

1010 

1011 def testTransaction(self) -> None: 

1012 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1013 datasetTypeName = "test_metric" 

1014 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1015 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1016 ("instrument", {"instrument": "DummyCam"}), 

1017 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1018 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1019 ) 

1020 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1021 metric = makeExampleMetrics() 

1022 dataId = {"instrument": "DummyCam", "visit": 42} 

1023 # Create and register a DatasetType 

1024 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1025 with self.assertRaises(TransactionTestError): 

1026 with butler.transaction(): 

1027 # Add needed Dimensions 

1028 for args in dimensionEntries: 

1029 butler.registry.insertDimensionData(*args) 

1030 # Store a dataset 

1031 ref = butler.put(metric, datasetTypeName, dataId) 

1032 self.assertIsInstance(ref, DatasetRef) 

1033 # Test getDirect 

1034 metricOut = butler.get(ref) 

1035 self.assertEqual(metric, metricOut) 

1036 # Test get 

1037 metricOut = butler.get(datasetTypeName, dataId) 

1038 self.assertEqual(metric, metricOut) 

1039 # Check we can get components 

1040 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1041 raise TransactionTestError("This should roll back the entire transaction") 

1042 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1043 butler.registry.expandDataId(dataId) 

1044 # Should raise LookupError for missing data ID value 

1045 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1046 butler.get(datasetTypeName, dataId) 

1047 # Also check explicitly if Dataset entry is missing 

1048 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1049 # Direct retrieval should not find the file in the Datastore 

1050 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1051 butler.get(ref) 

1052 

1053 def testMakeRepo(self) -> None: 

1054 """Test that we can write butler configuration to a new repository via 

1055 the Butler.makeRepo interface and then instantiate a butler from the 

1056 repo root. 

1057 """ 

1058 # Do not run the test if we know this datastore configuration does 

1059 # not support a file system root 

1060 if self.fullConfigKey is None: 

1061 return 

1062 

1063 # create two separate directories 

1064 root1 = tempfile.mkdtemp(dir=self.root) 

1065 root2 = tempfile.mkdtemp(dir=self.root) 

1066 

1067 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1068 limited = Config(self.configFile) 

1069 butler1 = Butler(butlerConfig) 

1070 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1071 full = Config(self.tmpConfigFile) 

1072 butler2 = Butler(butlerConfig) 

1073 # Butlers should have the same configuration regardless of whether 

1074 # defaults were expanded. 

1075 self.assertEqual(butler1._config, butler2._config) 

1076 # Config files loaded directly should not be the same. 

1077 self.assertNotEqual(limited, full) 

1078 # Make sure "limited" doesn't have a few keys we know it should be 

1079 # inheriting from defaults. 

1080 self.assertIn(self.fullConfigKey, full) 

1081 self.assertNotIn(self.fullConfigKey, limited) 

1082 

1083 # Collections don't appear until something is put in them 

1084 collections1 = set(butler1.registry.queryCollections()) 

1085 self.assertEqual(collections1, set()) 

1086 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1087 

1088 # Check that a config with no associated file name will not 

1089 # work properly with relocatable Butler repo 

1090 butlerConfig.configFile = None 

1091 with self.assertRaises(ValueError): 

1092 Butler(butlerConfig) 

1093 

1094 with self.assertRaises(FileExistsError): 

1095 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1096 

1097 def testStringification(self) -> None: 

1098 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1099 butlerStr = str(butler) 

1100 

1101 if self.datastoreStr is not None: 

1102 for testStr in self.datastoreStr: 

1103 self.assertIn(testStr, butlerStr) 

1104 if self.registryStr is not None: 

1105 self.assertIn(self.registryStr, butlerStr) 

1106 

1107 datastoreName = butler.datastore.name 

1108 if self.datastoreName is not None: 

1109 for testStr in self.datastoreName: 

1110 self.assertIn(testStr, datastoreName) 

1111 

1112 def testButlerRewriteDataId(self) -> None: 

1113 """Test that dataIds can be rewritten based on dimension records.""" 

1114 

1115 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1116 

1117 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1118 datasetTypeName = "random_data" 

1119 

1120 # Create dimension records. 

1121 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1122 butler.registry.insertDimensionData( 

1123 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1124 ) 

1125 butler.registry.insertDimensionData( 

1126 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1127 ) 

1128 

1129 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1130 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1131 butler.registry.registerDatasetType(datasetType) 

1132 

1133 n_exposures = 5 

1134 dayobs = 20210530 

1135 

1136 for i in range(n_exposures): 

1137 butler.registry.insertDimensionData( 

1138 "exposure", 

1139 { 

1140 "instrument": "DummyCamComp", 

1141 "id": i, 

1142 "obs_id": f"exp{i}", 

1143 "seq_num": i, 

1144 "day_obs": dayobs, 

1145 "physical_filter": "d-r", 

1146 }, 

1147 ) 

1148 

1149 # Write some data. 

1150 for i in range(n_exposures): 

1151 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1152 

1153 # Use the seq_num for the put to test rewriting. 

1154 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1155 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1156 

1157 # Check that the exposure is correct in the dataId 

1158 self.assertEqual(ref.dataId["exposure"], i) 

1159 

1160 # and check that we can get the dataset back with the same dataId 

1161 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1162 self.assertEqual(new_metric, metric) 

1163 

1164 

1165class FileDatastoreButlerTests(ButlerTests): 

1166 """Common tests and specialization of ButlerTests for butlers backed 

1167 by datastores that inherit from FileDatastore. 

1168 """ 

1169 

1170 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1171 """Checks if file exists at a given path (relative to root). 

1172 

1173 Test testPutTemplates verifies actual physical existance of the files 

1174 in the requested location. 

1175 """ 

1176 uri = ResourcePath(root, forceDirectory=True) 

1177 return uri.join(relpath).exists() 

1178 

1179 def testPutTemplates(self) -> None: 

1180 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1181 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1182 

1183 # Add needed Dimensions 

1184 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1185 butler.registry.insertDimensionData( 

1186 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1187 ) 

1188 butler.registry.insertDimensionData( 

1189 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1190 ) 

1191 butler.registry.insertDimensionData( 

1192 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1193 ) 

1194 

1195 # Create and store a dataset 

1196 metric = makeExampleMetrics() 

1197 

1198 # Create two almost-identical DatasetTypes (both will use default 

1199 # template) 

1200 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1201 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1202 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1203 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1204 

1205 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1206 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1207 

1208 # Put with exactly the data ID keys needed 

1209 ref = butler.put(metric, "metric1", dataId1) 

1210 uri = butler.getURI(ref) 

1211 self.assertTrue(uri.exists()) 

1212 self.assertTrue( 

1213 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1214 ) 

1215 

1216 # Check the template based on dimensions 

1217 if hasattr(butler.datastore, "templates"): 

1218 butler.datastore.templates.validateTemplates([ref]) 

1219 

1220 # Put with extra data ID keys (physical_filter is an optional 

1221 # dependency); should not change template (at least the way we're 

1222 # defining them to behave now; the important thing is that they 

1223 # must be consistent). 

1224 ref = butler.put(metric, "metric2", dataId2) 

1225 uri = butler.getURI(ref) 

1226 self.assertTrue(uri.exists()) 

1227 self.assertTrue( 

1228 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1229 ) 

1230 

1231 # Check the template based on dimensions 

1232 if hasattr(butler.datastore, "templates"): 

1233 butler.datastore.templates.validateTemplates([ref]) 

1234 

1235 # Use a template that has a typo in dimension record metadata. 

1236 # Easier to test with a butler that has a ref with records attached. 

1237 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1238 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1239 path = template.format(ref) 

1240 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1241 

1242 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1243 with self.assertRaises(KeyError): 

1244 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1245 template.format(ref) 

1246 

1247 # Now use a file template that will not result in unique filenames 

1248 with self.assertRaises(FileTemplateValidationError): 

1249 butler.put(metric, "metric3", dataId1) 

1250 

1251 def testImportExport(self) -> None: 

1252 # Run put/get tests just to create and populate a repo. 

1253 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1254 self.runImportExportTest(storageClass) 

1255 

1256 @unittest.expectedFailure 

1257 def testImportExportVirtualComposite(self) -> None: 

1258 # Run put/get tests just to create and populate a repo. 

1259 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1260 self.runImportExportTest(storageClass) 

1261 

1262 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1263 """This test does an export to a temp directory and an import back 

1264 into a new temp directory repo. It does not assume a posix datastore""" 

1265 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1266 

1267 # Test that we must have a file extension. 

1268 with self.assertRaises(ValueError): 

1269 with exportButler.export(filename="dump", directory=".") as export: 

1270 pass 

1271 

1272 # Test that unknown format is not allowed. 

1273 with self.assertRaises(ValueError): 

1274 with exportButler.export(filename="dump.fits", directory=".") as export: 

1275 pass 

1276 

1277 # Test that the repo actually has at least one dataset. 

1278 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1279 self.assertGreater(len(datasets), 0) 

1280 # Add a DimensionRecord that's unused by those datasets. 

1281 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1282 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1283 # Export and then import datasets. 

1284 with safeTestTempDir(TESTDIR) as exportDir: 

1285 exportFile = os.path.join(exportDir, "exports.yaml") 

1286 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1287 export.saveDatasets(datasets) 

1288 # Export the same datasets again. This should quietly do 

1289 # nothing because of internal deduplication, and it shouldn't 

1290 # complain about being asked to export the "htm7" elements even 

1291 # though there aren't any in these datasets or in the database. 

1292 export.saveDatasets(datasets, elements=["htm7"]) 

1293 # Save one of the data IDs again; this should be harmless 

1294 # because of internal deduplication. 

1295 export.saveDataIds([datasets[0].dataId]) 

1296 # Save some dimension records directly. 

1297 export.saveDimensionData("skymap", [skymapRecord]) 

1298 self.assertTrue(os.path.exists(exportFile)) 

1299 with safeTestTempDir(TESTDIR) as importDir: 

1300 # We always want this to be a local posix butler 

1301 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1302 # Calling script.butlerImport tests the implementation of the 

1303 # butler command line interface "import" subcommand. Functions 

1304 # in the script folder are generally considered protected and 

1305 # should not be used as public api. 

1306 with open(exportFile, "r") as f: 

1307 script.butlerImport( 

1308 importDir, 

1309 export_file=f, 

1310 directory=exportDir, 

1311 transfer="auto", 

1312 skip_dimensions=None, 

1313 ) 

1314 importButler = Butler(importDir, run=self.default_run) 

1315 for ref in datasets: 

1316 with self.subTest(ref=ref): 

1317 # Test for existence by passing in the DatasetType and 

1318 # data ID separately, to avoid lookup by dataset_id. 

1319 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1320 self.assertEqual( 

1321 list(importButler.registry.queryDimensionRecords("skymap")), 

1322 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1323 ) 

1324 

1325 def testRemoveRuns(self) -> None: 

1326 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1327 butler = Butler(self.tmpConfigFile, writeable=True) 

1328 # Load registry data with dimensions to hang datasets off of. 

1329 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1330 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1331 # Add some RUN-type collection. 

1332 run1 = "run1" 

1333 butler.registry.registerRun(run1) 

1334 run2 = "run2" 

1335 butler.registry.registerRun(run2) 

1336 # put a dataset in each 

1337 metric = makeExampleMetrics() 

1338 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1339 datasetType = self.addDatasetType( 

1340 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1341 ) 

1342 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1343 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1344 uri1 = butler.getURI(ref1, collections=[run1]) 

1345 uri2 = butler.getURI(ref2, collections=[run2]) 

1346 

1347 with self.assertRaises(OrphanedRecordError): 

1348 butler.registry.removeDatasetType(datasetType.name) 

1349 

1350 # Remove from both runs with different values for unstore. 

1351 butler.removeRuns([run1], unstore=True) 

1352 butler.removeRuns([run2], unstore=False) 

1353 # Should be nothing in registry for either one, and datastore should 

1354 # not think either exists. 

1355 with self.assertRaises(MissingCollectionError): 

1356 butler.registry.getCollectionType(run1) 

1357 with self.assertRaises(MissingCollectionError): 

1358 butler.registry.getCollectionType(run2) 

1359 self.assertFalse(butler.datastore.exists(ref1)) 

1360 self.assertFalse(butler.datastore.exists(ref2)) 

1361 # The ref we unstored should be gone according to the URI, but the 

1362 # one we forgot should still be around. 

1363 self.assertFalse(uri1.exists()) 

1364 self.assertTrue(uri2.exists()) 

1365 

1366 # Now that the collections have been pruned we can remove the 

1367 # dataset type 

1368 butler.registry.removeDatasetType(datasetType.name) 

1369 

1370 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm: 

1371 butler.registry.removeDatasetType(tuple(["test*", "test*"])) 

1372 self.assertIn("not defined", "\n".join(cm.output)) 

1373 

1374 

1375class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1376 """PosixDatastore specialization of a butler""" 

1377 

1378 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1379 fullConfigKey: str | None = ".datastore.formatters" 

1380 validationCanFail = True 

1381 datastoreStr = ["/tmp"] 

1382 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1383 registryStr = "/gen3.sqlite3" 

1384 

1385 def testPathConstructor(self) -> None: 

1386 """Independent test of constructor using PathLike.""" 

1387 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1388 self.assertIsInstance(butler, Butler) 

1389 

1390 # And again with a Path object with the butler yaml 

1391 path = pathlib.Path(self.tmpConfigFile) 

1392 butler = Butler(path, writeable=False) 

1393 self.assertIsInstance(butler, Butler) 

1394 

1395 # And again with a Path object without the butler yaml 

1396 # (making sure we skip it if the tmp config doesn't end 

1397 # in butler.yaml -- which is the case for a subclass) 

1398 if self.tmpConfigFile.endswith("butler.yaml"): 

1399 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1400 butler = Butler(path, writeable=False) 

1401 self.assertIsInstance(butler, Butler) 

1402 

1403 def testExportTransferCopy(self) -> None: 

1404 """Test local export using all transfer modes""" 

1405 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1406 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1407 # Test that the repo actually has at least one dataset. 

1408 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1409 self.assertGreater(len(datasets), 0) 

1410 uris = [exportButler.getURI(d) for d in datasets] 

1411 assert isinstance(exportButler.datastore, FileDatastore) 

1412 datastoreRoot = exportButler.datastore.root 

1413 

1414 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1415 

1416 for path in pathsInStore: 

1417 # Assume local file system 

1418 assert path is not None 

1419 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1420 

1421 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1422 with safeTestTempDir(TESTDIR) as exportDir: 

1423 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1424 export.saveDatasets(datasets) 

1425 for path in pathsInStore: 

1426 assert path is not None 

1427 self.assertTrue( 

1428 self.checkFileExists(exportDir, path), 

1429 f"Check that mode {transfer} exported files", 

1430 ) 

1431 

1432 def testPruneDatasets(self) -> None: 

1433 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1434 butler = Butler(self.tmpConfigFile, writeable=True) 

1435 assert isinstance(butler.datastore, FileDatastore) 

1436 # Load registry data with dimensions to hang datasets off of. 

1437 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1438 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1439 # Add some RUN-type collections. 

1440 run1 = "run1" 

1441 butler.registry.registerRun(run1) 

1442 run2 = "run2" 

1443 butler.registry.registerRun(run2) 

1444 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1445 # different runs. ref3 has a different data ID. 

1446 metric = makeExampleMetrics() 

1447 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1448 datasetType = self.addDatasetType( 

1449 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1450 ) 

1451 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1452 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1453 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1454 

1455 # Simple prune. 

1456 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1457 with self.assertRaises(LookupError): 

1458 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1459 

1460 # Put data back. 

1461 ref1 = butler.put(metric, ref1, run=run1) 

1462 ref2 = butler.put(metric, ref2, run=run2) 

1463 ref3 = butler.put(metric, ref3, run=run1) 

1464 

1465 # Check that in normal mode, deleting the record will lead to 

1466 # trash not touching the file. 

1467 uri1 = butler.datastore.getURI(ref1) 

1468 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1469 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1470 butler.datastore.trash(ref1) 

1471 butler.datastore.emptyTrash() 

1472 self.assertTrue(uri1.exists()) 

1473 uri1.remove() # Clean it up. 

1474 

1475 # Simulate execution butler setup by deleting the datastore 

1476 # record but keeping the file around and trusting. 

1477 butler.datastore.trustGetRequest = True 

1478 uri2 = butler.datastore.getURI(ref2) 

1479 uri3 = butler.datastore.getURI(ref3) 

1480 self.assertTrue(uri2.exists()) 

1481 self.assertTrue(uri3.exists()) 

1482 

1483 # Remove the datastore record. 

1484 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1485 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1486 self.assertTrue(uri2.exists()) 

1487 butler.datastore.trash([ref2, ref3]) 

1488 # Immediate removal for ref2 file 

1489 self.assertFalse(uri2.exists()) 

1490 # But ref3 has to wait for the empty. 

1491 self.assertTrue(uri3.exists()) 

1492 butler.datastore.emptyTrash() 

1493 self.assertFalse(uri3.exists()) 

1494 

1495 # Clear out the datasets from registry. 

1496 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1497 

1498 def testPytypeCoercion(self) -> None: 

1499 """Test python type coercion on Butler.get and put.""" 

1500 

1501 # Store some data with the normal example storage class. 

1502 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1503 datasetTypeName = "test_metric" 

1504 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1505 

1506 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1507 metric = butler.get(datasetTypeName, dataId=dataId) 

1508 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1509 

1510 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1511 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1512 

1513 # Now need to hack the registry dataset type definition. 

1514 # There is no API for this. 

1515 assert isinstance(butler.registry, SqlRegistry) 

1516 manager = butler.registry._managers.datasets 

1517 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1518 manager._db.update( 

1519 manager._static.dataset_type, 

1520 {"name": datasetTypeName}, 

1521 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1522 ) 

1523 

1524 # Force reset of dataset type cache 

1525 butler.registry.refresh() 

1526 

1527 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1528 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1529 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1530 

1531 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1532 self.assertNotEqual(type(metric_model), type(metric)) 

1533 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1534 

1535 # Put the model and read it back to show that everything now 

1536 # works as normal. 

1537 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1538 metric_model_new = butler.get(metric_ref) 

1539 self.assertEqual(metric_model_new, metric_model) 

1540 

1541 # Hack the storage class again to something that will fail on the 

1542 # get with no conversion class. 

1543 manager._db.update( 

1544 manager._static.dataset_type, 

1545 {"name": datasetTypeName}, 

1546 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1547 ) 

1548 butler.registry.refresh() 

1549 

1550 with self.assertRaises(ValueError): 

1551 butler.get(datasetTypeName, dataId=dataId) 

1552 

1553 

1554@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1555class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1556 """PosixDatastore specialization of a butler using Postgres""" 

1557 

1558 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1559 fullConfigKey = ".datastore.formatters" 

1560 validationCanFail = True 

1561 datastoreStr = ["/tmp"] 

1562 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1563 registryStr = "PostgreSQL@test" 

1564 postgresql: Any 

1565 

1566 @staticmethod 

1567 def _handler(postgresql: Any) -> None: 

1568 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1569 with engine.begin() as connection: 

1570 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1571 

1572 @classmethod 

1573 def setUpClass(cls) -> None: 

1574 # Create the postgres test server. 

1575 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1576 cache_initialized_db=True, on_initialized=cls._handler 

1577 ) 

1578 super().setUpClass() 

1579 

1580 @classmethod 

1581 def tearDownClass(cls) -> None: 

1582 # Clean up any lingering SQLAlchemy engines/connections 

1583 # so they're closed before we shut down the server. 

1584 gc.collect() 

1585 cls.postgresql.clear_cache() 

1586 super().tearDownClass() 

1587 

1588 def setUp(self) -> None: 

1589 self.server = self.postgresql() 

1590 

1591 # Need to add a registry section to the config. 

1592 self._temp_config = False 

1593 config = Config(self.configFile) 

1594 config["registry", "db"] = self.server.url() 

1595 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1596 config.dump(fh) 

1597 self.configFile = fh.name 

1598 self._temp_config = True 

1599 super().setUp() 

1600 

1601 def tearDown(self) -> None: 

1602 self.server.stop() 

1603 if self._temp_config and os.path.exists(self.configFile): 

1604 os.remove(self.configFile) 

1605 super().tearDown() 

1606 

1607 def testMakeRepo(self) -> None: 

1608 # The base class test assumes that it's using sqlite and assumes 

1609 # the config file is acceptable to sqlite. 

1610 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1611 

1612 

1613class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1614 """InMemoryDatastore specialization of a butler""" 

1615 

1616 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1617 fullConfigKey = None 

1618 useTempRoot = False 

1619 validationCanFail = False 

1620 datastoreStr = ["datastore='InMemory"] 

1621 datastoreName = ["InMemoryDatastore@"] 

1622 registryStr = "/gen3.sqlite3" 

1623 

1624 def testIngest(self) -> None: 

1625 pass 

1626 

1627 

1628class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1629 """PosixDatastore specialization""" 

1630 

1631 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1632 fullConfigKey = ".datastore.datastores.1.formatters" 

1633 validationCanFail = True 

1634 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1635 datastoreName = [ 

1636 "InMemoryDatastore@", 

1637 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1638 "SecondDatastore", 

1639 ] 

1640 registryStr = "/gen3.sqlite3" 

1641 

1642 

1643class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1644 """Test that a yaml file in one location can refer to a root in another.""" 

1645 

1646 datastoreStr = ["dir1"] 

1647 # Disable the makeRepo test since we are deliberately not using 

1648 # butler.yaml as the config name. 

1649 fullConfigKey = None 

1650 

1651 def setUp(self) -> None: 

1652 self.root = makeTestTempDir(TESTDIR) 

1653 

1654 # Make a new repository in one place 

1655 self.dir1 = os.path.join(self.root, "dir1") 

1656 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1657 

1658 # Move the yaml file to a different place and add a "root" 

1659 self.dir2 = os.path.join(self.root, "dir2") 

1660 os.makedirs(self.dir2, exist_ok=True) 

1661 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1662 config = Config(configFile1) 

1663 config["root"] = self.dir1 

1664 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1665 config.dumpToUri(configFile2) 

1666 os.remove(configFile1) 

1667 self.tmpConfigFile = configFile2 

1668 

1669 def testFileLocations(self) -> None: 

1670 self.assertNotEqual(self.dir1, self.dir2) 

1671 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1672 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1673 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1674 

1675 

1676class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1677 """Test that a config file created by makeRepo outside of repo works.""" 

1678 

1679 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1680 

1681 def setUp(self) -> None: 

1682 self.root = makeTestTempDir(TESTDIR) 

1683 self.root2 = makeTestTempDir(TESTDIR) 

1684 

1685 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1686 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1687 

1688 def tearDown(self) -> None: 

1689 if os.path.exists(self.root2): 

1690 shutil.rmtree(self.root2, ignore_errors=True) 

1691 super().tearDown() 

1692 

1693 def testConfigExistence(self) -> None: 

1694 c = Config(self.tmpConfigFile) 

1695 uri_config = ResourcePath(c["root"]) 

1696 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1697 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1698 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1699 

1700 def testPutGet(self) -> None: 

1701 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1702 self.runPutGetTest(storageClass, "test_metric") 

1703 

1704 

1705class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1706 """Test that a config file created by makeRepo outside of repo works.""" 

1707 

1708 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1709 

1710 def setUp(self) -> None: 

1711 self.root = makeTestTempDir(TESTDIR) 

1712 self.root2 = makeTestTempDir(TESTDIR) 

1713 

1714 self.tmpConfigFile = self.root2 

1715 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1716 

1717 def testConfigExistence(self) -> None: 

1718 # Append the yaml file else Config constructor does not know the file 

1719 # type. 

1720 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1721 super().testConfigExistence() 

1722 

1723 

1724class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1725 """Test that a config file created by makeRepo outside of repo works.""" 

1726 

1727 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1728 

1729 def setUp(self) -> None: 

1730 self.root = makeTestTempDir(TESTDIR) 

1731 self.root2 = makeTestTempDir(TESTDIR) 

1732 

1733 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1734 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1735 

1736 

1737@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1738class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1739 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1740 a local in-memory SqlRegistry. 

1741 """ 

1742 

1743 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1744 fullConfigKey = None 

1745 validationCanFail = True 

1746 

1747 bucketName = "anybucketname" 

1748 """Name of the Bucket that will be used in the tests. The name is read from 

1749 the config file used with the tests during set-up. 

1750 """ 

1751 

1752 root = "butlerRoot/" 

1753 """Root repository directory expected to be used in case useTempRoot=False. 

1754 Otherwise the root is set to a 20 characters long randomly generated string 

1755 during set-up. 

1756 """ 

1757 

1758 datastoreStr = [f"datastore={root}"] 

1759 """Contains all expected root locations in a format expected to be 

1760 returned by Butler stringification. 

1761 """ 

1762 

1763 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1764 """The expected format of the S3 Datastore string.""" 

1765 

1766 registryStr = "/gen3.sqlite3" 

1767 """Expected format of the Registry string.""" 

1768 

1769 mock_s3 = mock_s3() 

1770 """The mocked s3 interface from moto.""" 

1771 

1772 def genRoot(self) -> str: 

1773 """Returns a random string of len 20 to serve as a root 

1774 name for the temporary bucket repo. 

1775 

1776 This is equivalent to tempfile.mkdtemp as this is what self.root 

1777 becomes when useTempRoot is True. 

1778 """ 

1779 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1780 return rndstr + "/" 

1781 

1782 def setUp(self) -> None: 

1783 config = Config(self.configFile) 

1784 uri = ResourcePath(config[".datastore.datastore.root"]) 

1785 self.bucketName = uri.netloc 

1786 

1787 # Enable S3 mocking of tests. 

1788 self.mock_s3.start() 

1789 

1790 # set up some fake credentials if they do not exist 

1791 self.usingDummyCredentials = setAwsEnvCredentials() 

1792 

1793 if self.useTempRoot: 

1794 self.root = self.genRoot() 

1795 rooturi = f"s3://{self.bucketName}/{self.root}" 

1796 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1797 

1798 # need local folder to store registry database 

1799 self.reg_dir = makeTestTempDir(TESTDIR) 

1800 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1801 

1802 # MOTO needs to know that we expect Bucket bucketname to exist 

1803 # (this used to be the class attribute bucketName) 

1804 s3 = boto3.resource("s3") 

1805 s3.create_bucket(Bucket=self.bucketName) 

1806 

1807 self.datastoreStr = [f"datastore='{rooturi}'"] 

1808 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1809 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1810 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1811 

1812 def tearDown(self) -> None: 

1813 s3 = boto3.resource("s3") 

1814 bucket = s3.Bucket(self.bucketName) 

1815 try: 

1816 bucket.objects.all().delete() 

1817 except botocore.exceptions.ClientError as e: 

1818 if e.response["Error"]["Code"] == "404": 

1819 # the key was not reachable - pass 

1820 pass 

1821 else: 

1822 raise 

1823 

1824 bucket = s3.Bucket(self.bucketName) 

1825 bucket.delete() 

1826 

1827 # Stop the S3 mock. 

1828 self.mock_s3.stop() 

1829 

1830 # unset any potentially set dummy credentials 

1831 if self.usingDummyCredentials: 

1832 unsetAwsEnvCredentials() 

1833 

1834 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1835 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1836 

1837 if self.useTempRoot and os.path.exists(self.root): 

1838 shutil.rmtree(self.root, ignore_errors=True) 

1839 

1840 super().tearDown() 

1841 

1842 

1843class PosixDatastoreTransfers(unittest.TestCase): 

1844 """Test data transfers between butlers. 

1845 

1846 Test for different managers. UUID to UUID and integer to integer are 

1847 tested. UUID to integer is not supported since we do not currently 

1848 want to allow that. Integer to UUID is supported with the caveat 

1849 that UUID4 will be generated and this will be incorrect for raw 

1850 dataset types. The test ignores that. 

1851 """ 

1852 

1853 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1854 storageClassFactory: StorageClassFactory 

1855 

1856 @classmethod 

1857 def setUpClass(cls) -> None: 

1858 cls.storageClassFactory = StorageClassFactory() 

1859 cls.storageClassFactory.addFromConfig(cls.configFile) 

1860 

1861 def setUp(self) -> None: 

1862 self.root = makeTestTempDir(TESTDIR) 

1863 self.config = Config(self.configFile) 

1864 

1865 def tearDown(self) -> None: 

1866 removeTestTempDir(self.root) 

1867 

1868 def create_butler(self, manager: str, label: str) -> Butler: 

1869 config = Config(self.configFile) 

1870 config["registry", "managers", "datasets"] = manager 

1871 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1872 

1873 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

1874 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

1875 if manager1 is None: 

1876 manager1 = default 

1877 if manager2 is None: 

1878 manager2 = default 

1879 self.source_butler = self.create_butler(manager1, "1") 

1880 self.target_butler = self.create_butler(manager2, "2") 

1881 

1882 def testTransferUuidToUuid(self) -> None: 

1883 self.create_butlers() 

1884 self.assertButlerTransfers() 

1885 

1886 def _enable_trust(self, datastore: Datastore) -> None: 

1887 if hasattr(datastore, "trustGetRequest"): 

1888 datastore.trustGetRequest = True 

1889 elif hasattr(datastore, "datastores"): 

1890 for datastore in datastore.datastores: 

1891 if hasattr(datastore, "trustGetRequest"): 

1892 datastore.trustGetRequest = True 

1893 

1894 def testTransferMissing(self) -> None: 

1895 """Test transfers where datastore records are missing. 

1896 

1897 This is how execution butler works. 

1898 """ 

1899 self.create_butlers() 

1900 

1901 # Configure the source butler to allow trust. 

1902 self._enable_trust(self.source_butler.datastore) 

1903 

1904 self.assertButlerTransfers(purge=True) 

1905 

1906 def testTransferMissingDisassembly(self) -> None: 

1907 """Test transfers where datastore records are missing. 

1908 

1909 This is how execution butler works. 

1910 """ 

1911 self.create_butlers() 

1912 

1913 # Configure the source butler to allow trust. 

1914 self._enable_trust(self.source_butler.datastore) 

1915 

1916 # Test disassembly. 

1917 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1918 

1919 def testAbsoluteURITransferDirect(self) -> None: 

1920 """Test transfer using an absolute URI.""" 

1921 self._absolute_transfer("auto") 

1922 

1923 def testAbsoluteURITransferCopy(self) -> None: 

1924 """Test transfer using an absolute URI.""" 

1925 self._absolute_transfer("copy") 

1926 

1927 def _absolute_transfer(self, transfer: str) -> None: 

1928 self.create_butlers() 

1929 

1930 storageClassName = "StructuredData" 

1931 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1932 datasetTypeName = "random_data" 

1933 run = "run1" 

1934 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1935 

1936 dimensions = self.source_butler.registry.dimensions.extract(()) 

1937 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1938 self.source_butler.registry.registerDatasetType(datasetType) 

1939 

1940 metrics = makeExampleMetrics() 

1941 with ResourcePath.temporary_uri(suffix=".json") as temp: 

1942 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions) 

1943 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

1944 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

1945 dataset = FileDataset(path=temp, refs=source_refs) 

1946 self.source_butler.ingest(dataset, transfer="direct") 

1947 

1948 self.target_butler.transfer_from( 

1949 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

1950 ) 

1951 

1952 uri = self.target_butler.getURI(dataset.refs[0]) 

1953 if transfer == "auto": 

1954 self.assertEqual(uri, temp) 

1955 else: 

1956 self.assertNotEqual(uri, temp) 

1957 

1958 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

1959 """Test that a run can be transferred to another butler.""" 

1960 

1961 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1962 datasetTypeName = "random_data" 

1963 

1964 # Test will create 3 collections and we will want to transfer 

1965 # two of those three. 

1966 runs = ["run1", "run2", "other"] 

1967 

1968 # Also want to use two different dataset types to ensure that 

1969 # grouping works. 

1970 datasetTypeNames = ["random_data", "random_data_2"] 

1971 

1972 # Create the run collections in the source butler. 

1973 for run in runs: 

1974 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1975 

1976 # Create dimensions in source butler. 

1977 n_exposures = 30 

1978 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1979 self.source_butler.registry.insertDimensionData( 

1980 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1981 ) 

1982 self.source_butler.registry.insertDimensionData( 

1983 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1984 ) 

1985 

1986 for i in range(n_exposures): 

1987 self.source_butler.registry.insertDimensionData( 

1988 "exposure", 

1989 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

1990 ) 

1991 

1992 # Create dataset types in the source butler. 

1993 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

1994 for datasetTypeName in datasetTypeNames: 

1995 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1996 self.source_butler.registry.registerDatasetType(datasetType) 

1997 

1998 # Write a dataset to an unrelated run -- this will ensure that 

1999 # we are rewriting integer dataset ids in the target if necessary. 

2000 # Will not be relevant for UUID. 

2001 run = "distraction" 

2002 butler = Butler(butler=self.source_butler, run=run) 

2003 butler.put( 

2004 makeExampleMetrics(), 

2005 datasetTypeName, 

2006 exposure=1, 

2007 instrument="DummyCamComp", 

2008 physical_filter="d-r", 

2009 ) 

2010 

2011 # Write some example metrics to the source 

2012 butler = Butler(butler=self.source_butler) 

2013 

2014 # Set of DatasetRefs that should be in the list of refs to transfer 

2015 # but which will not be transferred. 

2016 deleted: set[DatasetRef] = set() 

2017 

2018 n_expected = 20 # Number of datasets expected to be transferred 

2019 source_refs = [] 

2020 for i in range(n_exposures): 

2021 # Put a third of datasets into each collection, only retain 

2022 # two thirds. 

2023 index = i % 3 

2024 run = runs[index] 

2025 datasetTypeName = datasetTypeNames[i % 2] 

2026 

2027 metric = MetricsExample( 

2028 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2029 ) 

2030 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2031 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2032 

2033 # Remove the datastore record using low-level API 

2034 if purge: 

2035 # Remove records for a fraction. 

2036 if index == 1: 

2037 # For one of these delete the file as well. 

2038 # This allows the "missing" code to filter the 

2039 # file out. 

2040 # Access the individual datastores. 

2041 datastores = [] 

2042 if hasattr(butler.datastore, "datastores"): 

2043 datastores.extend(butler.datastore.datastores) 

2044 else: 

2045 datastores.append(butler.datastore) 

2046 

2047 if not deleted: 

2048 # For a chained datastore we need to remove 

2049 # files in each chain. 

2050 for datastore in datastores: 

2051 # The file might not be known to the datastore 

2052 # if constraints are used. 

2053 try: 

2054 primary, uris = datastore.getURIs(ref) 

2055 except FileNotFoundError: 

2056 continue 

2057 if primary: 

2058 if primary.scheme != "mem": 

2059 primary.remove() 

2060 for uri in uris.values(): 

2061 if uri.scheme != "mem": 

2062 uri.remove() 

2063 n_expected -= 1 

2064 deleted.add(ref) 

2065 

2066 # Remove the datastore record. 

2067 for datastore in datastores: 

2068 if hasattr(datastore, "removeStoredItemInfo"): 

2069 datastore.removeStoredItemInfo(ref) 

2070 

2071 if index < 2: 

2072 source_refs.append(ref) 

2073 if ref not in deleted: 

2074 new_metric = butler.get(ref) 

2075 self.assertEqual(new_metric, metric) 

2076 

2077 # Create some bad dataset types to ensure we check for inconsistent 

2078 # definitions. 

2079 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2080 for datasetTypeName in datasetTypeNames: 

2081 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2082 self.target_butler.registry.registerDatasetType(datasetType) 

2083 with self.assertRaises(ConflictingDefinitionError) as cm: 

2084 self.target_butler.transfer_from(self.source_butler, source_refs) 

2085 self.assertIn("dataset type differs", str(cm.exception)) 

2086 

2087 # And remove the bad definitions. 

2088 for datasetTypeName in datasetTypeNames: 

2089 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2090 

2091 # Transfer without creating dataset types should fail. 

2092 with self.assertRaises(KeyError): 

2093 self.target_butler.transfer_from(self.source_butler, source_refs) 

2094 

2095 # Transfer without creating dimensions should fail. 

2096 with self.assertRaises(ConflictingDefinitionError) as cm: 

2097 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2098 self.assertIn("dimension", str(cm.exception)) 

2099 

2100 # The failed transfer above leaves registry in an inconsistent 

2101 # state because the run is created but then rolled back without 

2102 # the collection cache being cleared. For now force a refresh. 

2103 # Can remove with DM-35498. 

2104 self.target_butler.registry.refresh() 

2105 

2106 # Now transfer them to the second butler, including dimensions. 

2107 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2108 transferred = self.target_butler.transfer_from( 

2109 self.source_butler, 

2110 source_refs, 

2111 register_dataset_types=True, 

2112 transfer_dimensions=True, 

2113 ) 

2114 self.assertEqual(len(transferred), n_expected) 

2115 log_output = ";".join(log_cm.output) 

2116 

2117 # A ChainedDatastore will use the in-memory datastore for mexists 

2118 # so we can not rely on the mexists log message. 

2119 self.assertIn("Number of datastore records found in source", log_output) 

2120 self.assertIn("Creating output run", log_output) 

2121 

2122 # Do the transfer twice to ensure that it will do nothing extra. 

2123 # Only do this if purge=True because it does not work for int 

2124 # dataset_id. 

2125 if purge: 

2126 # This should not need to register dataset types. 

2127 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2128 self.assertEqual(len(transferred), n_expected) 

2129 

2130 # Also do an explicit low-level transfer to trigger some 

2131 # edge cases. 

2132 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2133 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2134 log_output = ";".join(log_cm.output) 

2135 self.assertIn("no file artifacts exist", log_output) 

2136 

2137 with self.assertRaises((TypeError, AttributeError)): 

2138 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2139 

2140 with self.assertRaises(ValueError): 

2141 self.target_butler.datastore.transfer_from( 

2142 self.source_butler.datastore, source_refs, transfer="split" 

2143 ) 

2144 

2145 # Now try to get the same refs from the new butler. 

2146 for ref in source_refs: 

2147 if ref not in deleted: 

2148 new_metric = self.target_butler.get(ref) 

2149 old_metric = self.source_butler.get(ref) 

2150 self.assertEqual(new_metric, old_metric) 

2151 

2152 # Now prune run2 collection and create instead a CHAINED collection. 

2153 # This should block the transfer. 

2154 self.target_butler.removeRuns(["run2"], unstore=True) 

2155 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2156 with self.assertRaises(CollectionTypeError): 

2157 # Re-importing the run1 datasets can be problematic if they 

2158 # use integer IDs so filter those out. 

2159 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2160 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2161 

2162 

2163class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2164 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2165 

2166 

2167if __name__ == "__main__": 

2168 unittest.main()