Coverage for tests/test_butler.py: 13%

1174 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-02 02:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24from __future__ import annotations 

25 

26import gc 

27import json 

28import logging 

29import os 

30import pathlib 

31import pickle 

32import posixpath 

33import random 

34import shutil 

35import string 

36import tempfile 

37import unittest 

38import uuid 

39from collections.abc import Mapping 

40from typing import TYPE_CHECKING, Any, cast 

41 

42try: 

43 import boto3 

44 import botocore 

45 from moto import mock_s3 # type: ignore[import] 

46except ImportError: 

47 boto3 = None 

48 

49 def mock_s3(cls): 

50 """A no-op decorator in case moto mock_s3 can not be imported.""" 

51 return cls 

52 

53 

54try: 

55 # It's possible but silly to have testing.postgresql installed without 

56 # having the postgresql server installed (because then nothing in 

57 # testing.postgresql would work), so we use the presence of that module 

58 # to test whether we can expect the server to be available. 

59 import testing.postgresql # type: ignore[import] 

60except ImportError: 

61 testing = None 

62 

63import astropy.time 

64import sqlalchemy 

65from lsst.daf.butler import ( 

66 Butler, 

67 ButlerConfig, 

68 CollectionType, 

69 Config, 

70 DataCoordinate, 

71 DatasetRef, 

72 DatasetType, 

73 FileDataset, 

74 FileTemplate, 

75 FileTemplateValidationError, 

76 StorageClassFactory, 

77 ValidationError, 

78 script, 

79) 

80from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

81from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

82from lsst.daf.butler.registries.sql import SqlRegistry 

83from lsst.daf.butler.registry import ( 

84 CollectionError, 

85 CollectionTypeError, 

86 ConflictingDefinitionError, 

87 DataIdValueError, 

88 MissingCollectionError, 

89 OrphanedRecordError, 

90) 

91from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

92from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

93from lsst.resources import ResourcePath 

94from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

95from lsst.utils import doImportType 

96from lsst.utils.ellipsis import Ellipsis 

97from lsst.utils.introspection import get_full_type_name 

98 

99if TYPE_CHECKING: 

100 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass 

101 

102TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

103 

104 

105def makeExampleMetrics(): 

106 return MetricsExample( 

107 {"AM1": 5.2, "AM2": 30.6}, 

108 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

109 [563, 234, 456.7, 752, 8, 9, 27], 

110 ) 

111 

112 

113class TransactionTestError(Exception): 

114 """Specific error for testing transactions, to prevent misdiagnosing 

115 that might otherwise occur when a standard exception is used. 

116 """ 

117 

118 pass 

119 

120 

121class ButlerConfigTests(unittest.TestCase): 

122 """Simple tests for ButlerConfig that are not tested in any other test 

123 cases.""" 

124 

125 def testSearchPath(self): 

126 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

127 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

128 config1 = ButlerConfig(configFile) 

129 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

130 

131 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

132 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

133 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

134 self.assertIn("testConfigs", "\n".join(cm.output)) 

135 

136 key = ("datastore", "records", "table") 

137 self.assertNotEqual(config1[key], config2[key]) 

138 self.assertEqual(config2[key], "override_record") 

139 

140 

141class ButlerPutGetTests(TestCaseMixin): 

142 """Helper method for running a suite of put/get tests from different 

143 butler configurations.""" 

144 

145 root: str 

146 default_run = "ingésτ😺" 

147 storageClassFactory: StorageClassFactory 

148 configFile: str 

149 tmpConfigFile: str 

150 

151 @staticmethod 

152 def addDatasetType( 

153 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry 

154 ) -> DatasetType: 

155 """Create a DatasetType and register it""" 

156 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

157 registry.registerDatasetType(datasetType) 

158 return datasetType 

159 

160 @classmethod 

161 def setUpClass(cls) -> None: 

162 cls.storageClassFactory = StorageClassFactory() 

163 cls.storageClassFactory.addFromConfig(cls.configFile) 

164 

165 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None) -> None: 

166 datasetType = datasetRef.datasetType 

167 dataId = datasetRef.dataId 

168 deferred = butler.getDeferred(datasetRef) 

169 

170 for component in components: 

171 compTypeName = datasetType.componentTypeName(component) 

172 result = butler.get(compTypeName, dataId, collections=collections) 

173 self.assertEqual(result, getattr(reference, component)) 

174 result_deferred = deferred.get(component=component) 

175 self.assertEqual(result_deferred, result) 

176 

177 def tearDown(self) -> None: 

178 removeTestTempDir(self.root) 

179 

180 def create_butler( 

181 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

182 ) -> tuple[Butler, DatasetType]: 

183 butler = Butler(self.tmpConfigFile, run=run) 

184 

185 collections = set(butler.registry.queryCollections()) 

186 self.assertEqual(collections, set([run])) 

187 

188 # Create and register a DatasetType 

189 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

190 

191 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

192 

193 # Add needed Dimensions 

194 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

195 butler.registry.insertDimensionData( 

196 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

197 ) 

198 butler.registry.insertDimensionData( 

199 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

200 ) 

201 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

202 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

203 butler.registry.insertDimensionData( 

204 "visit", 

205 { 

206 "instrument": "DummyCamComp", 

207 "id": 423, 

208 "name": "fourtwentythree", 

209 "physical_filter": "d-r", 

210 "visit_system": 1, 

211 "datetime_begin": visit_start, 

212 "datetime_end": visit_end, 

213 }, 

214 ) 

215 

216 # Add more visits for some later tests 

217 for visit_id in (424, 425): 

218 butler.registry.insertDimensionData( 

219 "visit", 

220 { 

221 "instrument": "DummyCamComp", 

222 "id": visit_id, 

223 "name": f"fourtwentyfour_{visit_id}", 

224 "physical_filter": "d-r", 

225 "visit_system": 1, 

226 }, 

227 ) 

228 return butler, datasetType 

229 

230 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: 

231 # New datasets will be added to run and tag, but we will only look in 

232 # tag when looking up datasets. 

233 run = self.default_run 

234 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

235 assert butler.run is not None 

236 

237 # Create and store a dataset 

238 metric = makeExampleMetrics() 

239 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

240 

241 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

242 # and once with a DatasetType 

243 

244 # Keep track of any collections we add and do not clean up 

245 expected_collections = {run} 

246 

247 counter = 0 

248 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

249 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

250 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

251 # Since we are using subTest we can get cascading failures 

252 # here with the first attempt failing and the others failing 

253 # immediately because the dataset already exists. Work around 

254 # this by using a distinct run collection each time 

255 counter += 1 

256 this_run = f"put_run_{counter}" 

257 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

258 expected_collections.update({this_run}) 

259 

260 with self.subTest(args=args): 

261 ref = butler.put(metric, *args, run=this_run) 

262 self.assertIsInstance(ref, DatasetRef) 

263 

264 # Test getDirect 

265 metricOut = butler.get(ref) 

266 self.assertEqual(metric, metricOut) 

267 # Test get 

268 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

269 self.assertEqual(metric, metricOut) 

270 # Test get with a datasetRef 

271 metricOut = butler.get(ref, collections=this_run) 

272 self.assertEqual(metric, metricOut) 

273 # Test getDeferred with dataId 

274 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

275 self.assertEqual(metric, metricOut) 

276 # Test getDeferred with a datasetRef 

277 metricOut = butler.getDeferred(ref, collections=this_run).get() 

278 self.assertEqual(metric, metricOut) 

279 # and deferred direct with ref 

280 metricOut = butler.getDeferred(ref).get() 

281 self.assertEqual(metric, metricOut) 

282 

283 # Check we can get components 

284 if storageClass.isComposite(): 

285 self.assertGetComponents( 

286 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

287 ) 

288 

289 # Can the artifacts themselves be retrieved? 

290 if not butler.datastore.isEphemeral: 

291 root_uri = ResourcePath(self.root) 

292 

293 for preserve_path in (True, False): 

294 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

295 # Use copy so that we can test that overwrite 

296 # protection works (using "auto" for File URIs would 

297 # use hard links and subsequent transfer would work 

298 # because it knows they are the same file). 

299 transferred = butler.retrieveArtifacts( 

300 [ref], destination, preserve_path=preserve_path, transfer="copy" 

301 ) 

302 self.assertGreater(len(transferred), 0) 

303 artifacts = list(ResourcePath.findFileResources([destination])) 

304 self.assertEqual(set(transferred), set(artifacts)) 

305 

306 for artifact in transferred: 

307 path_in_destination = artifact.relative_to(destination) 

308 self.assertIsNotNone(path_in_destination) 

309 assert path_in_destination is not None 

310 

311 # when path is not preserved there should not be 

312 # any path separators. 

313 num_seps = path_in_destination.count("/") 

314 if preserve_path: 

315 self.assertGreater(num_seps, 0) 

316 else: 

317 self.assertEqual(num_seps, 0) 

318 

319 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

320 n_uris = len(secondary_uris) 

321 if primary_uri: 

322 n_uris += 1 

323 self.assertEqual( 

324 len(artifacts), 

325 n_uris, 

326 "Comparing expected artifacts vs actual:" 

327 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

328 ) 

329 

330 if preserve_path: 

331 # No need to run these twice 

332 with self.assertRaises(ValueError): 

333 butler.retrieveArtifacts([ref], destination, transfer="move") 

334 

335 with self.assertRaises(FileExistsError): 

336 butler.retrieveArtifacts([ref], destination) 

337 

338 transferred_again = butler.retrieveArtifacts( 

339 [ref], destination, preserve_path=preserve_path, overwrite=True 

340 ) 

341 self.assertEqual(set(transferred_again), set(transferred)) 

342 

343 # Now remove the dataset completely. 

344 butler.pruneDatasets([ref], purge=True, unstore=True) 

345 # Lookup with original args should still fail. 

346 with self.assertRaises(LookupError): 

347 butler.datasetExists(*args, collections=this_run) 

348 # get() should still fail. 

349 with self.assertRaises(FileNotFoundError): 

350 butler.get(ref) 

351 # Registry shouldn't be able to find it by dataset_id anymore. 

352 self.assertIsNone(butler.registry.getDataset(ref.id)) 

353 

354 # Do explicit registry removal since we know they are 

355 # empty 

356 butler.registry.removeCollection(this_run) 

357 expected_collections.remove(this_run) 

358 

359 # Create DatasetRef for put using default run. 

360 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

361 

362 # Put the dataset again, since the last thing we did was remove it 

363 # and we want to use the default collection. 

364 ref = butler.put(metric, refIn) 

365 

366 # Get with parameters 

367 stop = 4 

368 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

369 self.assertNotEqual(metric, sliced) 

370 self.assertEqual(metric.summary, sliced.summary) 

371 self.assertEqual(metric.output, sliced.output) 

372 self.assertEqual(metric.data[:stop], sliced.data) 

373 # getDeferred with parameters 

374 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

375 self.assertNotEqual(metric, sliced) 

376 self.assertEqual(metric.summary, sliced.summary) 

377 self.assertEqual(metric.output, sliced.output) 

378 self.assertEqual(metric.data[:stop], sliced.data) 

379 # getDeferred with deferred parameters 

380 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

381 self.assertNotEqual(metric, sliced) 

382 self.assertEqual(metric.summary, sliced.summary) 

383 self.assertEqual(metric.output, sliced.output) 

384 self.assertEqual(metric.data[:stop], sliced.data) 

385 

386 if storageClass.isComposite(): 

387 # Check that components can be retrieved 

388 metricOut = butler.get(ref.datasetType.name, dataId) 

389 compNameS = ref.datasetType.componentTypeName("summary") 

390 compNameD = ref.datasetType.componentTypeName("data") 

391 summary = butler.get(compNameS, dataId) 

392 self.assertEqual(summary, metric.summary) 

393 data = butler.get(compNameD, dataId) 

394 self.assertEqual(data, metric.data) 

395 

396 if "counter" in storageClass.derivedComponents: 

397 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

398 self.assertEqual(count, len(data)) 

399 

400 count = butler.get( 

401 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

402 ) 

403 self.assertEqual(count, stop) 

404 

405 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

406 assert compRef is not None 

407 summary = butler.get(compRef) 

408 self.assertEqual(summary, metric.summary) 

409 

410 # Create a Dataset type that has the same name but is inconsistent. 

411 inconsistentDatasetType = DatasetType( 

412 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

413 ) 

414 

415 # Getting with a dataset type that does not match registry fails 

416 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"): 

417 butler.get(inconsistentDatasetType, dataId) 

418 

419 # Combining a DatasetRef with a dataId should fail 

420 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

421 butler.get(ref, dataId) 

422 # Getting with an explicit ref should fail if the id doesn't match. 

423 with self.assertRaises(FileNotFoundError): 

424 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

425 

426 # Getting a dataset with unknown parameters should fail 

427 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

428 butler.get(ref, parameters={"unsupported": True}) 

429 

430 # Check we have a collection 

431 collections = set(butler.registry.queryCollections()) 

432 self.assertEqual(collections, expected_collections) 

433 

434 # Clean up to check that we can remove something that may have 

435 # already had a component removed 

436 butler.pruneDatasets([ref], unstore=True, purge=True) 

437 

438 # Add the same ref again, so we can check that duplicate put fails. 

439 ref = butler.put(metric, datasetType, dataId) 

440 

441 # Repeat put will fail. 

442 with self.assertRaisesRegex( 

443 ConflictingDefinitionError, "A database constraint failure was triggered" 

444 ): 

445 butler.put(metric, datasetType, dataId) 

446 

447 # Remove the datastore entry. 

448 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

449 

450 # Put will still fail 

451 with self.assertRaisesRegex( 

452 ConflictingDefinitionError, "A database constraint failure was triggered" 

453 ): 

454 butler.put(metric, datasetType, dataId) 

455 

456 # Repeat the same sequence with resolved ref. 

457 butler.pruneDatasets([ref], unstore=True, purge=True) 

458 ref = butler.put(metric, refIn) 

459 

460 # Repeat put will fail. 

461 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

462 butler.put(metric, refIn) 

463 

464 # Remove the datastore entry. 

465 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

466 

467 # In case of resolved ref this write will succeed. 

468 ref = butler.put(metric, refIn) 

469 

470 # Leave the dataset in place since some downstream tests require 

471 # something to be present 

472 

473 return butler 

474 

475 def testDeferredCollectionPassing(self) -> None: 

476 # Construct a butler with no run or collection, but make it writeable. 

477 butler = Butler(self.tmpConfigFile, writeable=True) 

478 # Create and register a DatasetType 

479 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

480 datasetType = self.addDatasetType( 

481 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

482 ) 

483 # Add needed Dimensions 

484 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

485 butler.registry.insertDimensionData( 

486 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

487 ) 

488 butler.registry.insertDimensionData( 

489 "visit", 

490 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

491 ) 

492 dataId = {"instrument": "DummyCamComp", "visit": 423} 

493 # Create dataset. 

494 metric = makeExampleMetrics() 

495 # Register a new run and put dataset. 

496 run = "deferred" 

497 self.assertTrue(butler.registry.registerRun(run)) 

498 # Second time it will be allowed but indicate no-op 

499 self.assertFalse(butler.registry.registerRun(run)) 

500 ref = butler.put(metric, datasetType, dataId, run=run) 

501 # Putting with no run should fail with TypeError. 

502 with self.assertRaises(CollectionError): 

503 butler.put(metric, datasetType, dataId) 

504 # Dataset should exist. 

505 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

506 # We should be able to get the dataset back, but with and without 

507 # a deferred dataset handle. 

508 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

509 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

510 # Trying to find the dataset without any collection is a TypeError. 

511 with self.assertRaises(CollectionError): 

512 butler.datasetExists(datasetType, dataId) 

513 with self.assertRaises(CollectionError): 

514 butler.get(datasetType, dataId) 

515 # Associate the dataset with a different collection. 

516 butler.registry.registerCollection("tagged") 

517 butler.registry.associate("tagged", [ref]) 

518 # Deleting the dataset from the new collection should make it findable 

519 # in the original collection. 

520 butler.pruneDatasets([ref], tags=["tagged"]) 

521 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

522 

523 

524class ButlerTests(ButlerPutGetTests): 

525 """Tests for Butler.""" 

526 

527 useTempRoot = True 

528 validationCanFail: bool 

529 fullConfigKey: str | None 

530 registryStr: str | None 

531 datastoreName: list[str] | None 

532 datastoreStr: list[str] 

533 

534 def setUp(self) -> None: 

535 """Create a new butler root for each test.""" 

536 self.root = makeTestTempDir(TESTDIR) 

537 Butler.makeRepo(self.root, config=Config(self.configFile)) 

538 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

539 

540 def testConstructor(self) -> None: 

541 """Independent test of constructor.""" 

542 butler = Butler(self.tmpConfigFile, run=self.default_run) 

543 self.assertIsInstance(butler, Butler) 

544 

545 # Check that butler.yaml is added automatically. 

546 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

547 config_dir = self.tmpConfigFile[: -len(end)] 

548 butler = Butler(config_dir, run=self.default_run) 

549 self.assertIsInstance(butler, Butler) 

550 

551 # Even with a ResourcePath. 

552 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

553 self.assertIsInstance(butler, Butler) 

554 

555 collections = set(butler.registry.queryCollections()) 

556 self.assertEqual(collections, {self.default_run}) 

557 

558 # Check that some special characters can be included in run name. 

559 special_run = "u@b.c-A" 

560 butler_special = Butler(butler=butler, run=special_run) 

561 collections = set(butler_special.registry.queryCollections("*@*")) 

562 self.assertEqual(collections, {special_run}) 

563 

564 butler2 = Butler(butler=butler, collections=["other"]) 

565 self.assertEqual(butler2.collections, ("other",)) 

566 self.assertIsNone(butler2.run) 

567 self.assertIs(butler.datastore, butler2.datastore) 

568 

569 # Test that we can use an environment variable to find this 

570 # repository. 

571 butler_index = Config() 

572 butler_index["label"] = self.tmpConfigFile 

573 for suffix in (".yaml", ".json"): 

574 # Ensure that the content differs so that we know that 

575 # we aren't reusing the cache. 

576 bad_label = f"s3://bucket/not_real{suffix}" 

577 butler_index["bad_label"] = bad_label 

578 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

579 butler_index.dumpToUri(temp_file) 

580 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

581 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

582 uri = Butler.get_repo_uri("bad_label") 

583 self.assertEqual(uri, ResourcePath(bad_label)) 

584 uri = Butler.get_repo_uri("label") 

585 butler = Butler(uri, writeable=False) 

586 self.assertIsInstance(butler, Butler) 

587 butler = Butler("label", writeable=False) 

588 self.assertIsInstance(butler, Butler) 

589 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

590 Butler("not_there", writeable=False) 

591 with self.assertRaises(KeyError) as cm: 

592 Butler.get_repo_uri("missing") 

593 self.assertEqual(Butler.get_repo_uri("missing", True), ResourcePath("missing")) 

594 self.assertIn("not known to", str(cm.exception)) 

595 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

596 with self.assertRaises(FileNotFoundError): 

597 Butler.get_repo_uri("label") 

598 self.assertEqual(Butler.get_known_repos(), set()) 

599 with self.assertRaises(KeyError) as cm: 

600 # No environment variable set. 

601 Butler.get_repo_uri("label") 

602 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label")) 

603 self.assertIn("No repository index defined", str(cm.exception)) 

604 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

605 # No aliases registered. 

606 Butler("not_there") 

607 self.assertEqual(Butler.get_known_repos(), set()) 

608 

609 def testBasicPutGet(self) -> None: 

610 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

611 self.runPutGetTest(storageClass, "test_metric") 

612 

613 def testCompositePutGetConcrete(self) -> None: 

614 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

615 butler = self.runPutGetTest(storageClass, "test_metric") 

616 

617 # Should *not* be disassembled 

618 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

619 self.assertEqual(len(datasets), 1) 

620 uri, components = butler.getURIs(datasets[0]) 

621 self.assertIsInstance(uri, ResourcePath) 

622 self.assertFalse(components) 

623 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

624 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

625 

626 # Predicted dataset 

627 dataId = {"instrument": "DummyCamComp", "visit": 424} 

628 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

629 self.assertFalse(components) 

630 self.assertIsInstance(uri, ResourcePath) 

631 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

632 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

633 

634 def testCompositePutGetVirtual(self) -> None: 

635 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

636 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

637 

638 # Should be disassembled 

639 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

640 self.assertEqual(len(datasets), 1) 

641 uri, components = butler.getURIs(datasets[0]) 

642 

643 if butler.datastore.isEphemeral: 

644 # Never disassemble in-memory datastore 

645 self.assertIsInstance(uri, ResourcePath) 

646 self.assertFalse(components) 

647 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

648 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

649 else: 

650 self.assertIsNone(uri) 

651 self.assertEqual(set(components), set(storageClass.components)) 

652 for compuri in components.values(): 

653 self.assertIsInstance(compuri, ResourcePath) 

654 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

655 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

656 

657 # Predicted dataset 

658 dataId = {"instrument": "DummyCamComp", "visit": 424} 

659 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

660 

661 if butler.datastore.isEphemeral: 

662 # Never disassembled 

663 self.assertIsInstance(uri, ResourcePath) 

664 self.assertFalse(components) 

665 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

666 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

667 else: 

668 self.assertIsNone(uri) 

669 self.assertEqual(set(components), set(storageClass.components)) 

670 for compuri in components.values(): 

671 self.assertIsInstance(compuri, ResourcePath) 

672 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

673 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

674 

675 def testStorageClassOverrideGet(self) -> None: 

676 """Test storage class conversion on get with override.""" 

677 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

678 datasetTypeName = "anything" 

679 run = self.default_run 

680 

681 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

682 

683 # Create and store a dataset. 

684 metric = makeExampleMetrics() 

685 dataId = {"instrument": "DummyCamComp", "visit": 423} 

686 

687 ref = butler.put(metric, datasetType, dataId) 

688 

689 # Return native type. 

690 retrieved = butler.get(ref) 

691 self.assertEqual(retrieved, metric) 

692 

693 # Specify an override. 

694 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

695 model = butler.get(ref, storageClass=new_sc) 

696 self.assertNotEqual(type(model), type(retrieved)) 

697 self.assertIs(type(model), new_sc.pytype) 

698 self.assertEqual(retrieved, model) 

699 

700 # Defer but override later. 

701 deferred = butler.getDeferred(ref) 

702 model = deferred.get(storageClass=new_sc) 

703 self.assertIs(type(model), new_sc.pytype) 

704 self.assertEqual(retrieved, model) 

705 

706 # Defer but override up front. 

707 deferred = butler.getDeferred(ref, storageClass=new_sc) 

708 model = deferred.get() 

709 self.assertIs(type(model), new_sc.pytype) 

710 self.assertEqual(retrieved, model) 

711 

712 # Retrieve a component. Should be a tuple. 

713 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

714 self.assertIs(type(data), tuple) 

715 self.assertEqual(data, tuple(retrieved.data)) 

716 

717 # Parameter on the write storage class should work regardless 

718 # of read storage class. 

719 data = butler.get( 

720 "anything.data", 

721 dataId, 

722 storageClass="StructuredDataDataTestTuple", 

723 parameters={"slice": slice(2, 4)}, 

724 ) 

725 self.assertEqual(len(data), 2) 

726 

727 # Try a parameter that is known to the read storage class but not 

728 # the write storage class. 

729 with self.assertRaises(KeyError): 

730 butler.get( 

731 "anything.data", 

732 dataId, 

733 storageClass="StructuredDataDataTestTuple", 

734 parameters={"xslice": slice(2, 4)}, 

735 ) 

736 

737 def testPytypePutCoercion(self) -> None: 

738 """Test python type coercion on Butler.get and put.""" 

739 

740 # Store some data with the normal example storage class. 

741 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

742 datasetTypeName = "test_metric" 

743 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

744 

745 dataId = {"instrument": "DummyCamComp", "visit": 423} 

746 

747 # Put a dict and this should coerce to a MetricsExample 

748 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

749 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

750 test_metric = butler.get(metric_ref) 

751 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

752 self.assertEqual(test_metric.summary, test_dict["summary"]) 

753 self.assertEqual(test_metric.output, test_dict["output"]) 

754 

755 # Check that the put still works if a DatasetType is given with 

756 # a definition matching this python type. 

757 registry_type = butler.registry.getDatasetType(datasetTypeName) 

758 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

759 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

760 self.assertEqual(metric2_ref.datasetType, registry_type) 

761 

762 # The get will return the type expected by registry. 

763 test_metric2 = butler.get(metric2_ref) 

764 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

765 

766 # Make a new DatasetRef with the compatible but different DatasetType. 

767 # This should now return a dict. 

768 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

769 test_dict2 = butler.get(new_ref) 

770 self.assertEqual(get_full_type_name(test_dict2), "dict") 

771 

772 # Get it again with the wrong dataset type definition using get() 

773 # rather than get(). This should be consistent with get() 

774 # behavior and return the type of the DatasetType. 

775 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

776 self.assertEqual(get_full_type_name(test_dict3), "dict") 

777 

778 def testIngest(self) -> None: 

779 butler = Butler(self.tmpConfigFile, run=self.default_run) 

780 

781 # Create and register a DatasetType 

782 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

783 

784 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

785 datasetTypeName = "metric" 

786 

787 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

788 

789 # Add needed Dimensions 

790 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

791 butler.registry.insertDimensionData( 

792 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

793 ) 

794 for detector in (1, 2): 

795 butler.registry.insertDimensionData( 

796 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

797 ) 

798 

799 butler.registry.insertDimensionData( 

800 "visit", 

801 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

802 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

803 ) 

804 

805 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

806 dataRoot = os.path.join(TESTDIR, "data", "basic") 

807 datasets = [] 

808 for detector in (1, 2): 

809 detector_name = f"detector_{detector}" 

810 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

811 dataId = butler.registry.expandDataId( 

812 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

813 ) 

814 # Create a DatasetRef for ingest 

815 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

816 

817 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

818 

819 butler.ingest(*datasets, transfer="copy") 

820 

821 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

822 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

823 

824 metrics1 = butler.get(datasetTypeName, dataId1) 

825 metrics2 = butler.get(datasetTypeName, dataId2) 

826 self.assertNotEqual(metrics1, metrics2) 

827 

828 # Compare URIs 

829 uri1 = butler.getURI(datasetTypeName, dataId1) 

830 uri2 = butler.getURI(datasetTypeName, dataId2) 

831 self.assertNotEqual(uri1, uri2) 

832 

833 # Now do a multi-dataset but single file ingest 

834 metricFile = os.path.join(dataRoot, "detectors.yaml") 

835 refs = [] 

836 for detector in (1, 2): 

837 detector_name = f"detector_{detector}" 

838 dataId = butler.registry.expandDataId( 

839 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

840 ) 

841 # Create a DatasetRef for ingest 

842 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

843 

844 # Test "move" transfer to ensure that the files themselves 

845 # have disappeared following ingest. 

846 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

847 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

848 

849 datasets = [] 

850 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

851 

852 # For first ingest use copy. 

853 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

854 

855 # Now try to ingest again in "execution butler" mode where 

856 # the registry entries exist but the datastore does not have 

857 # the files. We also need to strip the dimension records to ensure 

858 # that they will be re-added by the ingest. 

859 ref = datasets[0].refs[0] 

860 datasets[0].refs = [ 

861 cast( 

862 DatasetRef, 

863 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run), 

864 ) 

865 for ref in datasets[0].refs 

866 ] 

867 all_refs = [] 

868 for dataset in datasets: 

869 refs = [] 

870 for ref in dataset.refs: 

871 # Create a dict from the dataId to drop the records. 

872 new_data_id = {str(k): v for k, v in ref.dataId.items()} 

873 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run) 

874 assert new_ref is not None 

875 self.assertFalse(new_ref.dataId.hasRecords()) 

876 refs.append(new_ref) 

877 dataset.refs = refs 

878 all_refs.extend(dataset.refs) 

879 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

880 

881 # Use move mode to test that the file is deleted. Also 

882 # disable recording of file size. 

883 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

884 

885 # Check that every ref now has records. 

886 for dataset in datasets: 

887 for ref in dataset.refs: 

888 self.assertTrue(ref.dataId.hasRecords()) 

889 

890 # Ensure that the file has disappeared. 

891 self.assertFalse(tempFile.exists()) 

892 

893 # Check that the datastore recorded no file size. 

894 # Not all datastores can support this. 

895 try: 

896 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

897 self.assertEqual(infos[0].file_size, -1) 

898 except AttributeError: 

899 pass 

900 

901 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

902 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

903 

904 multi1 = butler.get(datasetTypeName, dataId1) 

905 multi2 = butler.get(datasetTypeName, dataId2) 

906 

907 self.assertEqual(multi1, metrics1) 

908 self.assertEqual(multi2, metrics2) 

909 

910 # Compare URIs 

911 uri1 = butler.getURI(datasetTypeName, dataId1) 

912 uri2 = butler.getURI(datasetTypeName, dataId2) 

913 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

914 

915 # Test that removing one does not break the second 

916 # This line will issue a warning log message for a ChainedDatastore 

917 # that uses an InMemoryDatastore since in-memory can not ingest 

918 # files. 

919 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

920 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

921 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

922 multi2b = butler.get(datasetTypeName, dataId2) 

923 self.assertEqual(multi2, multi2b) 

924 

925 # Ensure we can ingest 0 datasets 

926 datasets = [] 

927 butler.ingest(*datasets) 

928 

929 def testPickle(self) -> None: 

930 """Test pickle support.""" 

931 butler = Butler(self.tmpConfigFile, run=self.default_run) 

932 butlerOut = pickle.loads(pickle.dumps(butler)) 

933 self.assertIsInstance(butlerOut, Butler) 

934 self.assertEqual(butlerOut._config, butler._config) 

935 self.assertEqual(butlerOut.collections, butler.collections) 

936 self.assertEqual(butlerOut.run, butler.run) 

937 

938 def testGetDatasetTypes(self) -> None: 

939 butler = Butler(self.tmpConfigFile, run=self.default_run) 

940 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

941 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

942 ( 

943 "instrument", 

944 [ 

945 {"instrument": "DummyCam"}, 

946 {"instrument": "DummyHSC"}, 

947 {"instrument": "DummyCamComp"}, 

948 ], 

949 ), 

950 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

951 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]), 

952 ] 

953 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

954 # Add needed Dimensions 

955 for element, data in dimensionEntries: 

956 butler.registry.insertDimensionData(element, *data) 

957 

958 # When a DatasetType is added to the registry entries are not created 

959 # for components but querying them can return the components. 

960 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

961 components = set() 

962 for datasetTypeName in datasetTypeNames: 

963 # Create and register a DatasetType 

964 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

965 

966 for componentName in storageClass.components: 

967 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

968 

969 fromRegistry: set[DatasetType] = set() 

970 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

971 fromRegistry.add(parent_dataset_type) 

972 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

973 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

974 

975 # Now that we have some dataset types registered, validate them 

976 butler.validateConfiguration( 

977 ignore=[ 

978 "test_metric_comp", 

979 "metric3", 

980 "metric5", 

981 "calexp", 

982 "DummySC", 

983 "datasetType.component", 

984 "random_data", 

985 "random_data_2", 

986 ] 

987 ) 

988 

989 # Add a new datasetType that will fail template validation 

990 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

991 if self.validationCanFail: 

992 with self.assertRaises(ValidationError): 

993 butler.validateConfiguration() 

994 

995 # Rerun validation but with a subset of dataset type names 

996 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

997 

998 # Rerun validation but ignore the bad datasetType 

999 butler.validateConfiguration( 

1000 ignore=[ 

1001 "test_metric_comp", 

1002 "metric3", 

1003 "metric5", 

1004 "calexp", 

1005 "DummySC", 

1006 "datasetType.component", 

1007 "random_data", 

1008 "random_data_2", 

1009 ] 

1010 ) 

1011 

1012 def testTransaction(self) -> None: 

1013 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1014 datasetTypeName = "test_metric" 

1015 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1016 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1017 ("instrument", {"instrument": "DummyCam"}), 

1018 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1019 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1020 ) 

1021 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1022 metric = makeExampleMetrics() 

1023 dataId = {"instrument": "DummyCam", "visit": 42} 

1024 # Create and register a DatasetType 

1025 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1026 with self.assertRaises(TransactionTestError): 

1027 with butler.transaction(): 

1028 # Add needed Dimensions 

1029 for args in dimensionEntries: 

1030 butler.registry.insertDimensionData(*args) 

1031 # Store a dataset 

1032 ref = butler.put(metric, datasetTypeName, dataId) 

1033 self.assertIsInstance(ref, DatasetRef) 

1034 # Test getDirect 

1035 metricOut = butler.get(ref) 

1036 self.assertEqual(metric, metricOut) 

1037 # Test get 

1038 metricOut = butler.get(datasetTypeName, dataId) 

1039 self.assertEqual(metric, metricOut) 

1040 # Check we can get components 

1041 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1042 raise TransactionTestError("This should roll back the entire transaction") 

1043 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1044 butler.registry.expandDataId(dataId) 

1045 # Should raise LookupError for missing data ID value 

1046 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1047 butler.get(datasetTypeName, dataId) 

1048 # Also check explicitly if Dataset entry is missing 

1049 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1050 # Direct retrieval should not find the file in the Datastore 

1051 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1052 butler.get(ref) 

1053 

1054 def testMakeRepo(self) -> None: 

1055 """Test that we can write butler configuration to a new repository via 

1056 the Butler.makeRepo interface and then instantiate a butler from the 

1057 repo root. 

1058 """ 

1059 # Do not run the test if we know this datastore configuration does 

1060 # not support a file system root 

1061 if self.fullConfigKey is None: 

1062 return 

1063 

1064 # create two separate directories 

1065 root1 = tempfile.mkdtemp(dir=self.root) 

1066 root2 = tempfile.mkdtemp(dir=self.root) 

1067 

1068 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1069 limited = Config(self.configFile) 

1070 butler1 = Butler(butlerConfig) 

1071 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1072 full = Config(self.tmpConfigFile) 

1073 butler2 = Butler(butlerConfig) 

1074 # Butlers should have the same configuration regardless of whether 

1075 # defaults were expanded. 

1076 self.assertEqual(butler1._config, butler2._config) 

1077 # Config files loaded directly should not be the same. 

1078 self.assertNotEqual(limited, full) 

1079 # Make sure "limited" doesn't have a few keys we know it should be 

1080 # inheriting from defaults. 

1081 self.assertIn(self.fullConfigKey, full) 

1082 self.assertNotIn(self.fullConfigKey, limited) 

1083 

1084 # Collections don't appear until something is put in them 

1085 collections1 = set(butler1.registry.queryCollections()) 

1086 self.assertEqual(collections1, set()) 

1087 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1088 

1089 # Check that a config with no associated file name will not 

1090 # work properly with relocatable Butler repo 

1091 butlerConfig.configFile = None 

1092 with self.assertRaises(ValueError): 

1093 Butler(butlerConfig) 

1094 

1095 with self.assertRaises(FileExistsError): 

1096 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1097 

1098 def testStringification(self) -> None: 

1099 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1100 butlerStr = str(butler) 

1101 

1102 if self.datastoreStr is not None: 

1103 for testStr in self.datastoreStr: 

1104 self.assertIn(testStr, butlerStr) 

1105 if self.registryStr is not None: 

1106 self.assertIn(self.registryStr, butlerStr) 

1107 

1108 datastoreName = butler.datastore.name 

1109 if self.datastoreName is not None: 

1110 for testStr in self.datastoreName: 

1111 self.assertIn(testStr, datastoreName) 

1112 

1113 def testButlerRewriteDataId(self) -> None: 

1114 """Test that dataIds can be rewritten based on dimension records.""" 

1115 

1116 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1117 

1118 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1119 datasetTypeName = "random_data" 

1120 

1121 # Create dimension records. 

1122 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1123 butler.registry.insertDimensionData( 

1124 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1125 ) 

1126 butler.registry.insertDimensionData( 

1127 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1128 ) 

1129 

1130 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1131 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1132 butler.registry.registerDatasetType(datasetType) 

1133 

1134 n_exposures = 5 

1135 dayobs = 20210530 

1136 

1137 for i in range(n_exposures): 

1138 butler.registry.insertDimensionData( 

1139 "exposure", 

1140 { 

1141 "instrument": "DummyCamComp", 

1142 "id": i, 

1143 "obs_id": f"exp{i}", 

1144 "seq_num": i, 

1145 "day_obs": dayobs, 

1146 "physical_filter": "d-r", 

1147 }, 

1148 ) 

1149 

1150 # Write some data. 

1151 for i in range(n_exposures): 

1152 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1153 

1154 # Use the seq_num for the put to test rewriting. 

1155 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1156 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1157 

1158 # Check that the exposure is correct in the dataId 

1159 self.assertEqual(ref.dataId["exposure"], i) 

1160 

1161 # and check that we can get the dataset back with the same dataId 

1162 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1163 self.assertEqual(new_metric, metric) 

1164 

1165 

1166class FileDatastoreButlerTests(ButlerTests): 

1167 """Common tests and specialization of ButlerTests for butlers backed 

1168 by datastores that inherit from FileDatastore. 

1169 """ 

1170 

1171 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1172 """Checks if file exists at a given path (relative to root). 

1173 

1174 Test testPutTemplates verifies actual physical existance of the files 

1175 in the requested location. 

1176 """ 

1177 uri = ResourcePath(root, forceDirectory=True) 

1178 return uri.join(relpath).exists() 

1179 

1180 def testPutTemplates(self) -> None: 

1181 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1182 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1183 

1184 # Add needed Dimensions 

1185 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1186 butler.registry.insertDimensionData( 

1187 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1188 ) 

1189 butler.registry.insertDimensionData( 

1190 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1191 ) 

1192 butler.registry.insertDimensionData( 

1193 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1194 ) 

1195 

1196 # Create and store a dataset 

1197 metric = makeExampleMetrics() 

1198 

1199 # Create two almost-identical DatasetTypes (both will use default 

1200 # template) 

1201 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1202 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1203 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1204 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1205 

1206 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1207 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1208 

1209 # Put with exactly the data ID keys needed 

1210 ref = butler.put(metric, "metric1", dataId1) 

1211 uri = butler.getURI(ref) 

1212 self.assertTrue(uri.exists()) 

1213 self.assertTrue( 

1214 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1215 ) 

1216 

1217 # Check the template based on dimensions 

1218 if hasattr(butler.datastore, "templates"): 

1219 butler.datastore.templates.validateTemplates([ref]) 

1220 

1221 # Put with extra data ID keys (physical_filter is an optional 

1222 # dependency); should not change template (at least the way we're 

1223 # defining them to behave now; the important thing is that they 

1224 # must be consistent). 

1225 ref = butler.put(metric, "metric2", dataId2) 

1226 uri = butler.getURI(ref) 

1227 self.assertTrue(uri.exists()) 

1228 self.assertTrue( 

1229 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1230 ) 

1231 

1232 # Check the template based on dimensions 

1233 if hasattr(butler.datastore, "templates"): 

1234 butler.datastore.templates.validateTemplates([ref]) 

1235 

1236 # Use a template that has a typo in dimension record metadata. 

1237 # Easier to test with a butler that has a ref with records attached. 

1238 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1239 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1240 path = template.format(ref) 

1241 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1242 

1243 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1244 with self.assertRaises(KeyError): 

1245 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1246 template.format(ref) 

1247 

1248 # Now use a file template that will not result in unique filenames 

1249 with self.assertRaises(FileTemplateValidationError): 

1250 butler.put(metric, "metric3", dataId1) 

1251 

1252 def testImportExport(self) -> None: 

1253 # Run put/get tests just to create and populate a repo. 

1254 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1255 self.runImportExportTest(storageClass) 

1256 

1257 @unittest.expectedFailure 

1258 def testImportExportVirtualComposite(self) -> None: 

1259 # Run put/get tests just to create and populate a repo. 

1260 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1261 self.runImportExportTest(storageClass) 

1262 

1263 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1264 """This test does an export to a temp directory and an import back 

1265 into a new temp directory repo. It does not assume a posix datastore""" 

1266 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1267 

1268 # Test that we must have a file extension. 

1269 with self.assertRaises(ValueError): 

1270 with exportButler.export(filename="dump", directory=".") as export: 

1271 pass 

1272 

1273 # Test that unknown format is not allowed. 

1274 with self.assertRaises(ValueError): 

1275 with exportButler.export(filename="dump.fits", directory=".") as export: 

1276 pass 

1277 

1278 # Test that the repo actually has at least one dataset. 

1279 datasets = list(exportButler.registry.queryDatasets(..., collections=Ellipsis)) 

1280 self.assertGreater(len(datasets), 0) 

1281 # Add a DimensionRecord that's unused by those datasets. 

1282 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1283 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1284 # Export and then import datasets. 

1285 with safeTestTempDir(TESTDIR) as exportDir: 

1286 exportFile = os.path.join(exportDir, "exports.yaml") 

1287 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1288 export.saveDatasets(datasets) 

1289 # Export the same datasets again. This should quietly do 

1290 # nothing because of internal deduplication, and it shouldn't 

1291 # complain about being asked to export the "htm7" elements even 

1292 # though there aren't any in these datasets or in the database. 

1293 export.saveDatasets(datasets, elements=["htm7"]) 

1294 # Save one of the data IDs again; this should be harmless 

1295 # because of internal deduplication. 

1296 export.saveDataIds([datasets[0].dataId]) 

1297 # Save some dimension records directly. 

1298 export.saveDimensionData("skymap", [skymapRecord]) 

1299 self.assertTrue(os.path.exists(exportFile)) 

1300 with safeTestTempDir(TESTDIR) as importDir: 

1301 # We always want this to be a local posix butler 

1302 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1303 # Calling script.butlerImport tests the implementation of the 

1304 # butler command line interface "import" subcommand. Functions 

1305 # in the script folder are generally considered protected and 

1306 # should not be used as public api. 

1307 with open(exportFile, "r") as f: 

1308 script.butlerImport( 

1309 importDir, 

1310 export_file=f, 

1311 directory=exportDir, 

1312 transfer="auto", 

1313 skip_dimensions=None, 

1314 ) 

1315 importButler = Butler(importDir, run=self.default_run) 

1316 for ref in datasets: 

1317 with self.subTest(ref=ref): 

1318 # Test for existence by passing in the DatasetType and 

1319 # data ID separately, to avoid lookup by dataset_id. 

1320 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1321 self.assertEqual( 

1322 list(importButler.registry.queryDimensionRecords("skymap")), 

1323 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1324 ) 

1325 

1326 def testRemoveRuns(self) -> None: 

1327 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1328 butler = Butler(self.tmpConfigFile, writeable=True) 

1329 # Load registry data with dimensions to hang datasets off of. 

1330 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1331 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1332 # Add some RUN-type collection. 

1333 run1 = "run1" 

1334 butler.registry.registerRun(run1) 

1335 run2 = "run2" 

1336 butler.registry.registerRun(run2) 

1337 # put a dataset in each 

1338 metric = makeExampleMetrics() 

1339 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1340 datasetType = self.addDatasetType( 

1341 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1342 ) 

1343 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1344 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1345 uri1 = butler.getURI(ref1, collections=[run1]) 

1346 uri2 = butler.getURI(ref2, collections=[run2]) 

1347 

1348 with self.assertRaises(OrphanedRecordError): 

1349 butler.registry.removeDatasetType(datasetType.name) 

1350 

1351 # Remove from both runs with different values for unstore. 

1352 butler.removeRuns([run1], unstore=True) 

1353 butler.removeRuns([run2], unstore=False) 

1354 # Should be nothing in registry for either one, and datastore should 

1355 # not think either exists. 

1356 with self.assertRaises(MissingCollectionError): 

1357 butler.registry.getCollectionType(run1) 

1358 with self.assertRaises(MissingCollectionError): 

1359 butler.registry.getCollectionType(run2) 

1360 self.assertFalse(butler.datastore.exists(ref1)) 

1361 self.assertFalse(butler.datastore.exists(ref2)) 

1362 # The ref we unstored should be gone according to the URI, but the 

1363 # one we forgot should still be around. 

1364 self.assertFalse(uri1.exists()) 

1365 self.assertTrue(uri2.exists()) 

1366 

1367 # Now that the collections have been pruned we can remove the 

1368 # dataset type 

1369 butler.registry.removeDatasetType(datasetType.name) 

1370 

1371 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm: 

1372 butler.registry.removeDatasetType(tuple(["test*", "test*"])) 

1373 self.assertIn("not defined", "\n".join(cm.output)) 

1374 

1375 

1376class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1377 """PosixDatastore specialization of a butler""" 

1378 

1379 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1380 fullConfigKey: str | None = ".datastore.formatters" 

1381 validationCanFail = True 

1382 datastoreStr = ["/tmp"] 

1383 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1384 registryStr = "/gen3.sqlite3" 

1385 

1386 def testPathConstructor(self) -> None: 

1387 """Independent test of constructor using PathLike.""" 

1388 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1389 self.assertIsInstance(butler, Butler) 

1390 

1391 # And again with a Path object with the butler yaml 

1392 path = pathlib.Path(self.tmpConfigFile) 

1393 butler = Butler(path, writeable=False) 

1394 self.assertIsInstance(butler, Butler) 

1395 

1396 # And again with a Path object without the butler yaml 

1397 # (making sure we skip it if the tmp config doesn't end 

1398 # in butler.yaml -- which is the case for a subclass) 

1399 if self.tmpConfigFile.endswith("butler.yaml"): 

1400 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1401 butler = Butler(path, writeable=False) 

1402 self.assertIsInstance(butler, Butler) 

1403 

1404 def testExportTransferCopy(self) -> None: 

1405 """Test local export using all transfer modes""" 

1406 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1407 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1408 # Test that the repo actually has at least one dataset. 

1409 datasets = list(exportButler.registry.queryDatasets(..., collections=Ellipsis)) 

1410 self.assertGreater(len(datasets), 0) 

1411 uris = [exportButler.getURI(d) for d in datasets] 

1412 assert isinstance(exportButler.datastore, FileDatastore) 

1413 datastoreRoot = exportButler.datastore.root 

1414 

1415 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1416 

1417 for path in pathsInStore: 

1418 # Assume local file system 

1419 assert path is not None 

1420 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1421 

1422 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1423 with safeTestTempDir(TESTDIR) as exportDir: 

1424 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1425 export.saveDatasets(datasets) 

1426 for path in pathsInStore: 

1427 assert path is not None 

1428 self.assertTrue( 

1429 self.checkFileExists(exportDir, path), 

1430 f"Check that mode {transfer} exported files", 

1431 ) 

1432 

1433 def testPruneDatasets(self) -> None: 

1434 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1435 butler = Butler(self.tmpConfigFile, writeable=True) 

1436 assert isinstance(butler.datastore, FileDatastore) 

1437 # Load registry data with dimensions to hang datasets off of. 

1438 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1439 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1440 # Add some RUN-type collections. 

1441 run1 = "run1" 

1442 butler.registry.registerRun(run1) 

1443 run2 = "run2" 

1444 butler.registry.registerRun(run2) 

1445 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1446 # different runs. ref3 has a different data ID. 

1447 metric = makeExampleMetrics() 

1448 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1449 datasetType = self.addDatasetType( 

1450 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1451 ) 

1452 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1453 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1454 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1455 

1456 # Simple prune. 

1457 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1458 with self.assertRaises(LookupError): 

1459 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1460 

1461 # Put data back. 

1462 ref1 = butler.put(metric, ref1, run=run1) 

1463 ref2 = butler.put(metric, ref2, run=run2) 

1464 ref3 = butler.put(metric, ref3, run=run1) 

1465 

1466 # Check that in normal mode, deleting the record will lead to 

1467 # trash not touching the file. 

1468 uri1 = butler.datastore.getURI(ref1) 

1469 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1470 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1471 butler.datastore.trash(ref1) 

1472 butler.datastore.emptyTrash() 

1473 self.assertTrue(uri1.exists()) 

1474 uri1.remove() # Clean it up. 

1475 

1476 # Simulate execution butler setup by deleting the datastore 

1477 # record but keeping the file around and trusting. 

1478 butler.datastore.trustGetRequest = True 

1479 uri2 = butler.datastore.getURI(ref2) 

1480 uri3 = butler.datastore.getURI(ref3) 

1481 self.assertTrue(uri2.exists()) 

1482 self.assertTrue(uri3.exists()) 

1483 

1484 # Remove the datastore record. 

1485 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1486 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1487 self.assertTrue(uri2.exists()) 

1488 butler.datastore.trash([ref2, ref3]) 

1489 # Immediate removal for ref2 file 

1490 self.assertFalse(uri2.exists()) 

1491 # But ref3 has to wait for the empty. 

1492 self.assertTrue(uri3.exists()) 

1493 butler.datastore.emptyTrash() 

1494 self.assertFalse(uri3.exists()) 

1495 

1496 # Clear out the datasets from registry. 

1497 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1498 

1499 def testPytypeCoercion(self) -> None: 

1500 """Test python type coercion on Butler.get and put.""" 

1501 

1502 # Store some data with the normal example storage class. 

1503 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1504 datasetTypeName = "test_metric" 

1505 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1506 

1507 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1508 metric = butler.get(datasetTypeName, dataId=dataId) 

1509 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1510 

1511 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1512 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1513 

1514 # Now need to hack the registry dataset type definition. 

1515 # There is no API for this. 

1516 assert isinstance(butler.registry, SqlRegistry) 

1517 manager = butler.registry._managers.datasets 

1518 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1519 manager._db.update( 

1520 manager._static.dataset_type, 

1521 {"name": datasetTypeName}, 

1522 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1523 ) 

1524 

1525 # Force reset of dataset type cache 

1526 butler.registry.refresh() 

1527 

1528 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1529 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1530 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1531 

1532 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1533 self.assertNotEqual(type(metric_model), type(metric)) 

1534 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1535 

1536 # Put the model and read it back to show that everything now 

1537 # works as normal. 

1538 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1539 metric_model_new = butler.get(metric_ref) 

1540 self.assertEqual(metric_model_new, metric_model) 

1541 

1542 # Hack the storage class again to something that will fail on the 

1543 # get with no conversion class. 

1544 manager._db.update( 

1545 manager._static.dataset_type, 

1546 {"name": datasetTypeName}, 

1547 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1548 ) 

1549 butler.registry.refresh() 

1550 

1551 with self.assertRaises(ValueError): 

1552 butler.get(datasetTypeName, dataId=dataId) 

1553 

1554 

1555@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1556class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1557 """PosixDatastore specialization of a butler using Postgres""" 

1558 

1559 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1560 fullConfigKey = ".datastore.formatters" 

1561 validationCanFail = True 

1562 datastoreStr = ["/tmp"] 

1563 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1564 registryStr = "PostgreSQL@test" 

1565 postgresql: Any 

1566 

1567 @staticmethod 

1568 def _handler(postgresql: Any) -> None: 

1569 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1570 with engine.begin() as connection: 

1571 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1572 

1573 @classmethod 

1574 def setUpClass(cls) -> None: 

1575 # Create the postgres test server. 

1576 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1577 cache_initialized_db=True, on_initialized=cls._handler 

1578 ) 

1579 super().setUpClass() 

1580 

1581 @classmethod 

1582 def tearDownClass(cls) -> None: 

1583 # Clean up any lingering SQLAlchemy engines/connections 

1584 # so they're closed before we shut down the server. 

1585 gc.collect() 

1586 cls.postgresql.clear_cache() 

1587 super().tearDownClass() 

1588 

1589 def setUp(self) -> None: 

1590 self.server = self.postgresql() 

1591 

1592 # Need to add a registry section to the config. 

1593 self._temp_config = False 

1594 config = Config(self.configFile) 

1595 config["registry", "db"] = self.server.url() 

1596 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1597 config.dump(fh) 

1598 self.configFile = fh.name 

1599 self._temp_config = True 

1600 super().setUp() 

1601 

1602 def tearDown(self) -> None: 

1603 self.server.stop() 

1604 if self._temp_config and os.path.exists(self.configFile): 

1605 os.remove(self.configFile) 

1606 super().tearDown() 

1607 

1608 def testMakeRepo(self) -> None: 

1609 # The base class test assumes that it's using sqlite and assumes 

1610 # the config file is acceptable to sqlite. 

1611 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1612 

1613 

1614class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1615 """InMemoryDatastore specialization of a butler""" 

1616 

1617 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1618 fullConfigKey = None 

1619 useTempRoot = False 

1620 validationCanFail = False 

1621 datastoreStr = ["datastore='InMemory"] 

1622 datastoreName = ["InMemoryDatastore@"] 

1623 registryStr = "/gen3.sqlite3" 

1624 

1625 def testIngest(self) -> None: 

1626 pass 

1627 

1628 

1629class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1630 """PosixDatastore specialization""" 

1631 

1632 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1633 fullConfigKey = ".datastore.datastores.1.formatters" 

1634 validationCanFail = True 

1635 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1636 datastoreName = [ 

1637 "InMemoryDatastore@", 

1638 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1639 "SecondDatastore", 

1640 ] 

1641 registryStr = "/gen3.sqlite3" 

1642 

1643 

1644class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1645 """Test that a yaml file in one location can refer to a root in another.""" 

1646 

1647 datastoreStr = ["dir1"] 

1648 # Disable the makeRepo test since we are deliberately not using 

1649 # butler.yaml as the config name. 

1650 fullConfigKey = None 

1651 

1652 def setUp(self) -> None: 

1653 self.root = makeTestTempDir(TESTDIR) 

1654 

1655 # Make a new repository in one place 

1656 self.dir1 = os.path.join(self.root, "dir1") 

1657 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1658 

1659 # Move the yaml file to a different place and add a "root" 

1660 self.dir2 = os.path.join(self.root, "dir2") 

1661 os.makedirs(self.dir2, exist_ok=True) 

1662 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1663 config = Config(configFile1) 

1664 config["root"] = self.dir1 

1665 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1666 config.dumpToUri(configFile2) 

1667 os.remove(configFile1) 

1668 self.tmpConfigFile = configFile2 

1669 

1670 def testFileLocations(self) -> None: 

1671 self.assertNotEqual(self.dir1, self.dir2) 

1672 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1673 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1674 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1675 

1676 

1677class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1678 """Test that a config file created by makeRepo outside of repo works.""" 

1679 

1680 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1681 

1682 def setUp(self) -> None: 

1683 self.root = makeTestTempDir(TESTDIR) 

1684 self.root2 = makeTestTempDir(TESTDIR) 

1685 

1686 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1687 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1688 

1689 def tearDown(self) -> None: 

1690 if os.path.exists(self.root2): 

1691 shutil.rmtree(self.root2, ignore_errors=True) 

1692 super().tearDown() 

1693 

1694 def testConfigExistence(self) -> None: 

1695 c = Config(self.tmpConfigFile) 

1696 uri_config = ResourcePath(c["root"]) 

1697 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1698 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1699 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1700 

1701 def testPutGet(self) -> None: 

1702 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1703 self.runPutGetTest(storageClass, "test_metric") 

1704 

1705 

1706class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1707 """Test that a config file created by makeRepo outside of repo works.""" 

1708 

1709 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1710 

1711 def setUp(self) -> None: 

1712 self.root = makeTestTempDir(TESTDIR) 

1713 self.root2 = makeTestTempDir(TESTDIR) 

1714 

1715 self.tmpConfigFile = self.root2 

1716 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1717 

1718 def testConfigExistence(self) -> None: 

1719 # Append the yaml file else Config constructor does not know the file 

1720 # type. 

1721 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1722 super().testConfigExistence() 

1723 

1724 

1725class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1726 """Test that a config file created by makeRepo outside of repo works.""" 

1727 

1728 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1729 

1730 def setUp(self) -> None: 

1731 self.root = makeTestTempDir(TESTDIR) 

1732 self.root2 = makeTestTempDir(TESTDIR) 

1733 

1734 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1735 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1736 

1737 

1738@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1739class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1740 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1741 a local in-memory SqlRegistry. 

1742 """ 

1743 

1744 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1745 fullConfigKey = None 

1746 validationCanFail = True 

1747 

1748 bucketName = "anybucketname" 

1749 """Name of the Bucket that will be used in the tests. The name is read from 

1750 the config file used with the tests during set-up. 

1751 """ 

1752 

1753 root = "butlerRoot/" 

1754 """Root repository directory expected to be used in case useTempRoot=False. 

1755 Otherwise the root is set to a 20 characters long randomly generated string 

1756 during set-up. 

1757 """ 

1758 

1759 datastoreStr = [f"datastore={root}"] 

1760 """Contains all expected root locations in a format expected to be 

1761 returned by Butler stringification. 

1762 """ 

1763 

1764 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1765 """The expected format of the S3 Datastore string.""" 

1766 

1767 registryStr = "/gen3.sqlite3" 

1768 """Expected format of the Registry string.""" 

1769 

1770 mock_s3 = mock_s3() 

1771 """The mocked s3 interface from moto.""" 

1772 

1773 def genRoot(self) -> str: 

1774 """Returns a random string of len 20 to serve as a root 

1775 name for the temporary bucket repo. 

1776 

1777 This is equivalent to tempfile.mkdtemp as this is what self.root 

1778 becomes when useTempRoot is True. 

1779 """ 

1780 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1781 return rndstr + "/" 

1782 

1783 def setUp(self) -> None: 

1784 config = Config(self.configFile) 

1785 uri = ResourcePath(config[".datastore.datastore.root"]) 

1786 self.bucketName = uri.netloc 

1787 

1788 # Enable S3 mocking of tests. 

1789 self.mock_s3.start() 

1790 

1791 # set up some fake credentials if they do not exist 

1792 self.usingDummyCredentials = setAwsEnvCredentials() 

1793 

1794 if self.useTempRoot: 

1795 self.root = self.genRoot() 

1796 rooturi = f"s3://{self.bucketName}/{self.root}" 

1797 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1798 

1799 # need local folder to store registry database 

1800 self.reg_dir = makeTestTempDir(TESTDIR) 

1801 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1802 

1803 # MOTO needs to know that we expect Bucket bucketname to exist 

1804 # (this used to be the class attribute bucketName) 

1805 s3 = boto3.resource("s3") 

1806 s3.create_bucket(Bucket=self.bucketName) 

1807 

1808 self.datastoreStr = [f"datastore='{rooturi}'"] 

1809 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1810 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1811 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1812 

1813 def tearDown(self) -> None: 

1814 s3 = boto3.resource("s3") 

1815 bucket = s3.Bucket(self.bucketName) 

1816 try: 

1817 bucket.objects.all().delete() 

1818 except botocore.exceptions.ClientError as e: 

1819 if e.response["Error"]["Code"] == "404": 

1820 # the key was not reachable - pass 

1821 pass 

1822 else: 

1823 raise 

1824 

1825 bucket = s3.Bucket(self.bucketName) 

1826 bucket.delete() 

1827 

1828 # Stop the S3 mock. 

1829 self.mock_s3.stop() 

1830 

1831 # unset any potentially set dummy credentials 

1832 if self.usingDummyCredentials: 

1833 unsetAwsEnvCredentials() 

1834 

1835 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1836 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1837 

1838 if self.useTempRoot and os.path.exists(self.root): 

1839 shutil.rmtree(self.root, ignore_errors=True) 

1840 

1841 super().tearDown() 

1842 

1843 

1844class PosixDatastoreTransfers(unittest.TestCase): 

1845 """Test data transfers between butlers. 

1846 

1847 Test for different managers. UUID to UUID and integer to integer are 

1848 tested. UUID to integer is not supported since we do not currently 

1849 want to allow that. Integer to UUID is supported with the caveat 

1850 that UUID4 will be generated and this will be incorrect for raw 

1851 dataset types. The test ignores that. 

1852 """ 

1853 

1854 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1855 storageClassFactory: StorageClassFactory 

1856 

1857 @classmethod 

1858 def setUpClass(cls) -> None: 

1859 cls.storageClassFactory = StorageClassFactory() 

1860 cls.storageClassFactory.addFromConfig(cls.configFile) 

1861 

1862 def setUp(self) -> None: 

1863 self.root = makeTestTempDir(TESTDIR) 

1864 self.config = Config(self.configFile) 

1865 

1866 def tearDown(self) -> None: 

1867 removeTestTempDir(self.root) 

1868 

1869 def create_butler(self, manager: str, label: str) -> Butler: 

1870 config = Config(self.configFile) 

1871 config["registry", "managers", "datasets"] = manager 

1872 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1873 

1874 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

1875 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

1876 if manager1 is None: 

1877 manager1 = default 

1878 if manager2 is None: 

1879 manager2 = default 

1880 self.source_butler = self.create_butler(manager1, "1") 

1881 self.target_butler = self.create_butler(manager2, "2") 

1882 

1883 def testTransferUuidToUuid(self) -> None: 

1884 self.create_butlers() 

1885 self.assertButlerTransfers() 

1886 

1887 def _enable_trust(self, datastore: Datastore) -> None: 

1888 if hasattr(datastore, "trustGetRequest"): 

1889 datastore.trustGetRequest = True 

1890 elif hasattr(datastore, "datastores"): 

1891 for datastore in datastore.datastores: 

1892 if hasattr(datastore, "trustGetRequest"): 

1893 datastore.trustGetRequest = True 

1894 

1895 def testTransferMissing(self) -> None: 

1896 """Test transfers where datastore records are missing. 

1897 

1898 This is how execution butler works. 

1899 """ 

1900 self.create_butlers() 

1901 

1902 # Configure the source butler to allow trust. 

1903 self._enable_trust(self.source_butler.datastore) 

1904 

1905 self.assertButlerTransfers(purge=True) 

1906 

1907 def testTransferMissingDisassembly(self) -> None: 

1908 """Test transfers where datastore records are missing. 

1909 

1910 This is how execution butler works. 

1911 """ 

1912 self.create_butlers() 

1913 

1914 # Configure the source butler to allow trust. 

1915 self._enable_trust(self.source_butler.datastore) 

1916 

1917 # Test disassembly. 

1918 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1919 

1920 def testAbsoluteURITransferDirect(self) -> None: 

1921 """Test transfer using an absolute URI.""" 

1922 self._absolute_transfer("auto") 

1923 

1924 def testAbsoluteURITransferCopy(self) -> None: 

1925 """Test transfer using an absolute URI.""" 

1926 self._absolute_transfer("copy") 

1927 

1928 def _absolute_transfer(self, transfer: str) -> None: 

1929 self.create_butlers() 

1930 

1931 storageClassName = "StructuredData" 

1932 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1933 datasetTypeName = "random_data" 

1934 run = "run1" 

1935 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1936 

1937 dimensions = self.source_butler.registry.dimensions.extract(()) 

1938 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1939 self.source_butler.registry.registerDatasetType(datasetType) 

1940 

1941 metrics = makeExampleMetrics() 

1942 with ResourcePath.temporary_uri(suffix=".json") as temp: 

1943 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions) 

1944 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

1945 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

1946 dataset = FileDataset(path=temp, refs=source_refs) 

1947 self.source_butler.ingest(dataset, transfer="direct") 

1948 

1949 self.target_butler.transfer_from( 

1950 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

1951 ) 

1952 

1953 uri = self.target_butler.getURI(dataset.refs[0]) 

1954 if transfer == "auto": 

1955 self.assertEqual(uri, temp) 

1956 else: 

1957 self.assertNotEqual(uri, temp) 

1958 

1959 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

1960 """Test that a run can be transferred to another butler.""" 

1961 

1962 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1963 datasetTypeName = "random_data" 

1964 

1965 # Test will create 3 collections and we will want to transfer 

1966 # two of those three. 

1967 runs = ["run1", "run2", "other"] 

1968 

1969 # Also want to use two different dataset types to ensure that 

1970 # grouping works. 

1971 datasetTypeNames = ["random_data", "random_data_2"] 

1972 

1973 # Create the run collections in the source butler. 

1974 for run in runs: 

1975 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1976 

1977 # Create dimensions in source butler. 

1978 n_exposures = 30 

1979 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1980 self.source_butler.registry.insertDimensionData( 

1981 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1982 ) 

1983 self.source_butler.registry.insertDimensionData( 

1984 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1985 ) 

1986 

1987 for i in range(n_exposures): 

1988 self.source_butler.registry.insertDimensionData( 

1989 "exposure", 

1990 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

1991 ) 

1992 

1993 # Create dataset types in the source butler. 

1994 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

1995 for datasetTypeName in datasetTypeNames: 

1996 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1997 self.source_butler.registry.registerDatasetType(datasetType) 

1998 

1999 # Write a dataset to an unrelated run -- this will ensure that 

2000 # we are rewriting integer dataset ids in the target if necessary. 

2001 # Will not be relevant for UUID. 

2002 run = "distraction" 

2003 butler = Butler(butler=self.source_butler, run=run) 

2004 butler.put( 

2005 makeExampleMetrics(), 

2006 datasetTypeName, 

2007 exposure=1, 

2008 instrument="DummyCamComp", 

2009 physical_filter="d-r", 

2010 ) 

2011 

2012 # Write some example metrics to the source 

2013 butler = Butler(butler=self.source_butler) 

2014 

2015 # Set of DatasetRefs that should be in the list of refs to transfer 

2016 # but which will not be transferred. 

2017 deleted: set[DatasetRef] = set() 

2018 

2019 n_expected = 20 # Number of datasets expected to be transferred 

2020 source_refs = [] 

2021 for i in range(n_exposures): 

2022 # Put a third of datasets into each collection, only retain 

2023 # two thirds. 

2024 index = i % 3 

2025 run = runs[index] 

2026 datasetTypeName = datasetTypeNames[i % 2] 

2027 

2028 metric = MetricsExample( 

2029 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2030 ) 

2031 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2032 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2033 

2034 # Remove the datastore record using low-level API 

2035 if purge: 

2036 # Remove records for a fraction. 

2037 if index == 1: 

2038 # For one of these delete the file as well. 

2039 # This allows the "missing" code to filter the 

2040 # file out. 

2041 # Access the individual datastores. 

2042 datastores = [] 

2043 if hasattr(butler.datastore, "datastores"): 

2044 datastores.extend(butler.datastore.datastores) 

2045 else: 

2046 datastores.append(butler.datastore) 

2047 

2048 if not deleted: 

2049 # For a chained datastore we need to remove 

2050 # files in each chain. 

2051 for datastore in datastores: 

2052 # The file might not be known to the datastore 

2053 # if constraints are used. 

2054 try: 

2055 primary, uris = datastore.getURIs(ref) 

2056 except FileNotFoundError: 

2057 continue 

2058 if primary: 

2059 if primary.scheme != "mem": 

2060 primary.remove() 

2061 for uri in uris.values(): 

2062 if uri.scheme != "mem": 

2063 uri.remove() 

2064 n_expected -= 1 

2065 deleted.add(ref) 

2066 

2067 # Remove the datastore record. 

2068 for datastore in datastores: 

2069 if hasattr(datastore, "removeStoredItemInfo"): 

2070 datastore.removeStoredItemInfo(ref) 

2071 

2072 if index < 2: 

2073 source_refs.append(ref) 

2074 if ref not in deleted: 

2075 new_metric = butler.get(ref) 

2076 self.assertEqual(new_metric, metric) 

2077 

2078 # Create some bad dataset types to ensure we check for inconsistent 

2079 # definitions. 

2080 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2081 for datasetTypeName in datasetTypeNames: 

2082 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2083 self.target_butler.registry.registerDatasetType(datasetType) 

2084 with self.assertRaises(ConflictingDefinitionError) as cm: 

2085 self.target_butler.transfer_from(self.source_butler, source_refs) 

2086 self.assertIn("dataset type differs", str(cm.exception)) 

2087 

2088 # And remove the bad definitions. 

2089 for datasetTypeName in datasetTypeNames: 

2090 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2091 

2092 # Transfer without creating dataset types should fail. 

2093 with self.assertRaises(KeyError): 

2094 self.target_butler.transfer_from(self.source_butler, source_refs) 

2095 

2096 # Transfer without creating dimensions should fail. 

2097 with self.assertRaises(ConflictingDefinitionError) as cm: 

2098 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2099 self.assertIn("dimension", str(cm.exception)) 

2100 

2101 # The failed transfer above leaves registry in an inconsistent 

2102 # state because the run is created but then rolled back without 

2103 # the collection cache being cleared. For now force a refresh. 

2104 # Can remove with DM-35498. 

2105 self.target_butler.registry.refresh() 

2106 

2107 # Now transfer them to the second butler, including dimensions. 

2108 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2109 transferred = self.target_butler.transfer_from( 

2110 self.source_butler, 

2111 source_refs, 

2112 register_dataset_types=True, 

2113 transfer_dimensions=True, 

2114 ) 

2115 self.assertEqual(len(transferred), n_expected) 

2116 log_output = ";".join(log_cm.output) 

2117 

2118 # A ChainedDatastore will use the in-memory datastore for mexists 

2119 # so we can not rely on the mexists log message. 

2120 self.assertIn("Number of datastore records found in source", log_output) 

2121 self.assertIn("Creating output run", log_output) 

2122 

2123 # Do the transfer twice to ensure that it will do nothing extra. 

2124 # Only do this if purge=True because it does not work for int 

2125 # dataset_id. 

2126 if purge: 

2127 # This should not need to register dataset types. 

2128 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2129 self.assertEqual(len(transferred), n_expected) 

2130 

2131 # Also do an explicit low-level transfer to trigger some 

2132 # edge cases. 

2133 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2134 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2135 log_output = ";".join(log_cm.output) 

2136 self.assertIn("no file artifacts exist", log_output) 

2137 

2138 with self.assertRaises((TypeError, AttributeError)): 

2139 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2140 

2141 with self.assertRaises(ValueError): 

2142 self.target_butler.datastore.transfer_from( 

2143 self.source_butler.datastore, source_refs, transfer="split" 

2144 ) 

2145 

2146 # Now try to get the same refs from the new butler. 

2147 for ref in source_refs: 

2148 if ref not in deleted: 

2149 new_metric = self.target_butler.get(ref) 

2150 old_metric = self.source_butler.get(ref) 

2151 self.assertEqual(new_metric, old_metric) 

2152 

2153 # Now prune run2 collection and create instead a CHAINED collection. 

2154 # This should block the transfer. 

2155 self.target_butler.removeRuns(["run2"], unstore=True) 

2156 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2157 with self.assertRaises(CollectionTypeError): 

2158 # Re-importing the run1 datasets can be problematic if they 

2159 # use integer IDs so filter those out. 

2160 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2161 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2162 

2163 

2164class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2165 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2166 

2167 

2168if __name__ == "__main__": 

2169 unittest.main()