Coverage for tests/test_butler.py: 15%

1413 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-07 11:04 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Tests for Butler. 

29""" 

30from __future__ import annotations 

31 

32import gc 

33import json 

34import logging 

35import os 

36import pathlib 

37import pickle 

38import posixpath 

39import random 

40import shutil 

41import string 

42import tempfile 

43import unittest 

44import uuid 

45from collections.abc import Mapping 

46from typing import TYPE_CHECKING, Any, cast 

47 

48try: 

49 import boto3 

50 import botocore 

51 from lsst.resources.s3utils import clean_test_environment_for_s3 

52 

53 try: 

54 from moto import mock_aws # v5 

55 except ImportError: 

56 from moto import mock_s3 as mock_aws 

57except ImportError: 

58 boto3 = None 

59 

60 def mock_aws(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] 

61 """No-op decorator in case moto mock_aws can not be imported.""" 

62 return None 

63 

64 

65try: 

66 from lsst.daf.butler.tests.server import create_test_server 

67except ImportError: 

68 create_test_server = None 

69 

70try: 

71 # It's possible but silly to have testing.postgresql installed without 

72 # having the postgresql server installed (because then nothing in 

73 # testing.postgresql would work), so we use the presence of that module 

74 # to test whether we can expect the server to be available. 

75 import testing.postgresql # type: ignore[import] 

76except ImportError: 

77 testing = None 

78 

79import astropy.time 

80import sqlalchemy 

81from lsst.daf.butler import ( 

82 Butler, 

83 ButlerConfig, 

84 ButlerRepoIndex, 

85 CollectionType, 

86 Config, 

87 DataCoordinate, 

88 DatasetExistence, 

89 DatasetRef, 

90 DatasetType, 

91 FileDataset, 

92 NoDefaultCollectionError, 

93 StorageClassFactory, 

94 ValidationError, 

95 script, 

96) 

97from lsst.daf.butler.datastore import NullDatastore 

98from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError 

99from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

100from lsst.daf.butler.direct_butler import DirectButler 

101from lsst.daf.butler.registry import ( 

102 CollectionError, 

103 CollectionTypeError, 

104 ConflictingDefinitionError, 

105 DataIdValueError, 

106 MissingCollectionError, 

107 OrphanedRecordError, 

108) 

109from lsst.daf.butler.registry.sql_registry import SqlRegistry 

110from lsst.daf.butler.repo_relocation import BUTLER_ROOT_TAG 

111from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

112from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

113from lsst.resources import ResourcePath 

114from lsst.utils import doImportType 

115from lsst.utils.introspection import get_full_type_name 

116 

117if TYPE_CHECKING: 

118 import types 

119 

120 from lsst.daf.butler import DimensionGroup, Registry, StorageClass 

121 

122TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

123 

124 

125def clean_environment() -> None: 

126 """Remove external environment variables that affect the tests.""" 

127 for k in ("DAF_BUTLER_REPOSITORY_INDEX",): 

128 os.environ.pop(k, None) 

129 

130 

131def makeExampleMetrics() -> MetricsExample: 

132 """Return example dataset suitable for tests.""" 

133 return MetricsExample( 

134 {"AM1": 5.2, "AM2": 30.6}, 

135 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

136 [563, 234, 456.7, 752, 8, 9, 27], 

137 ) 

138 

139 

140class TransactionTestError(Exception): 

141 """Specific error for testing transactions, to prevent misdiagnosing 

142 that might otherwise occur when a standard exception is used. 

143 """ 

144 

145 pass 

146 

147 

148class ButlerConfigTests(unittest.TestCase): 

149 """Simple tests for ButlerConfig that are not tested in any other test 

150 cases. 

151 """ 

152 

153 def testSearchPath(self) -> None: 

154 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

155 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

156 config1 = ButlerConfig(configFile) 

157 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

158 

159 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

160 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

161 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

162 self.assertIn("testConfigs", "\n".join(cm.output)) 

163 

164 key = ("datastore", "records", "table") 

165 self.assertNotEqual(config1[key], config2[key]) 

166 self.assertEqual(config2[key], "override_record") 

167 

168 

169class ButlerPutGetTests(TestCaseMixin): 

170 """Helper method for running a suite of put/get tests from different 

171 butler configurations. 

172 """ 

173 

174 root: str 

175 default_run = "ingésτ😺" 

176 storageClassFactory: StorageClassFactory 

177 configFile: str | None 

178 tmpConfigFile: str 

179 

180 @staticmethod 

181 def addDatasetType( 

182 datasetTypeName: str, dimensions: DimensionGroup, storageClass: StorageClass | str, registry: Registry 

183 ) -> DatasetType: 

184 """Create a DatasetType and register it""" 

185 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

186 registry.registerDatasetType(datasetType) 

187 return datasetType 

188 

189 @classmethod 

190 def setUpClass(cls) -> None: 

191 cls.storageClassFactory = StorageClassFactory() 

192 if cls.configFile is not None: 

193 cls.storageClassFactory.addFromConfig(cls.configFile) 

194 

195 def assertGetComponents( 

196 self, 

197 butler: Butler, 

198 datasetRef: DatasetRef, 

199 components: tuple[str, ...], 

200 reference: Any, 

201 collections: Any = None, 

202 ) -> None: 

203 datasetType = datasetRef.datasetType 

204 dataId = datasetRef.dataId 

205 deferred = butler.getDeferred(datasetRef) 

206 

207 for component in components: 

208 compTypeName = datasetType.componentTypeName(component) 

209 result = butler.get(compTypeName, dataId, collections=collections) 

210 self.assertEqual(result, getattr(reference, component)) 

211 result_deferred = deferred.get(component=component) 

212 self.assertEqual(result_deferred, result) 

213 

214 def tearDown(self) -> None: 

215 if self.root is not None: 

216 removeTestTempDir(self.root) 

217 

218 def create_empty_butler(self, run: str | None = None, writeable: bool | None = None): 

219 """Create a Butler for the test repository, without inserting test 

220 data. 

221 """ 

222 butler = Butler.from_config(self.tmpConfigFile, run=run, writeable=writeable) 

223 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

224 return butler 

225 

226 def create_butler( 

227 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

228 ) -> tuple[Butler, DatasetType]: 

229 """Create a Butler for the test repository and insert some test data 

230 into it. 

231 """ 

232 butler = self.create_empty_butler(run=run) 

233 

234 collections = set(butler.registry.queryCollections()) 

235 self.assertEqual(collections, {run}) 

236 # Create and register a DatasetType 

237 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

238 

239 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

240 

241 # Add needed Dimensions 

242 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

243 butler.registry.insertDimensionData( 

244 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

245 ) 

246 butler.registry.insertDimensionData( 

247 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

248 ) 

249 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20200101}) 

250 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

251 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

252 butler.registry.insertDimensionData( 

253 "visit", 

254 { 

255 "instrument": "DummyCamComp", 

256 "id": 423, 

257 "name": "fourtwentythree", 

258 "physical_filter": "d-r", 

259 "datetime_begin": visit_start, 

260 "datetime_end": visit_end, 

261 "day_obs": 20200101, 

262 }, 

263 ) 

264 

265 # Add more visits for some later tests 

266 for visit_id in (424, 425): 

267 butler.registry.insertDimensionData( 

268 "visit", 

269 { 

270 "instrument": "DummyCamComp", 

271 "id": visit_id, 

272 "name": f"fourtwentyfour_{visit_id}", 

273 "physical_filter": "d-r", 

274 "day_obs": 20200101, 

275 }, 

276 ) 

277 return butler, datasetType 

278 

279 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: 

280 # New datasets will be added to run and tag, but we will only look in 

281 # tag when looking up datasets. 

282 run = self.default_run 

283 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

284 assert butler.run is not None 

285 

286 # Create and store a dataset 

287 metric = makeExampleMetrics() 

288 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

289 

290 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

291 # and once with a DatasetType 

292 

293 # Keep track of any collections we add and do not clean up 

294 expected_collections = {run} 

295 

296 counter = 0 

297 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

298 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

299 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

300 # Since we are using subTest we can get cascading failures 

301 # here with the first attempt failing and the others failing 

302 # immediately because the dataset already exists. Work around 

303 # this by using a distinct run collection each time 

304 counter += 1 

305 this_run = f"put_run_{counter}" 

306 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

307 expected_collections.update({this_run}) 

308 

309 with self.subTest(args=args): 

310 kwargs: dict[str, Any] = {} 

311 if not isinstance(args[0], DatasetRef): # type: ignore 

312 kwargs["run"] = this_run 

313 ref = butler.put(metric, *args, **kwargs) 

314 self.assertIsInstance(ref, DatasetRef) 

315 

316 # Test get of a ref. 

317 metricOut = butler.get(ref) 

318 self.assertEqual(metric, metricOut) 

319 # Test get 

320 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

321 self.assertEqual(metric, metricOut) 

322 # Test get with a datasetRef 

323 metricOut = butler.get(ref) 

324 self.assertEqual(metric, metricOut) 

325 # Test getDeferred with dataId 

326 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

327 self.assertEqual(metric, metricOut) 

328 # Test getDeferred with a ref 

329 metricOut = butler.getDeferred(ref).get() 

330 self.assertEqual(metric, metricOut) 

331 

332 # Check we can get components 

333 if storageClass.isComposite(): 

334 self.assertGetComponents( 

335 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

336 ) 

337 

338 primary_uri, secondary_uris = butler.getURIs(ref) 

339 n_uris = len(secondary_uris) 

340 if primary_uri: 

341 n_uris += 1 

342 

343 # Can the artifacts themselves be retrieved? 

344 if not butler._datastore.isEphemeral: 

345 # Create a temporary directory to hold the retrieved 

346 # artifacts. 

347 with tempfile.TemporaryDirectory( 

348 prefix="butler-artifacts-", ignore_cleanup_errors=True 

349 ) as artifact_root: 

350 root_uri = ResourcePath(artifact_root, forceDirectory=True) 

351 

352 for preserve_path in (True, False): 

353 destination = root_uri.join(f"{preserve_path}_{counter}/") 

354 log = logging.getLogger("lsst.x") 

355 log.warning("Using destination %s for args %s", destination, args) 

356 # Use copy so that we can test that overwrite 

357 # protection works (using "auto" for File URIs 

358 # would use hard links and subsequent transfer 

359 # would work because it knows they are the same 

360 # file). 

361 transferred = butler.retrieveArtifacts( 

362 [ref], destination, preserve_path=preserve_path, transfer="copy" 

363 ) 

364 self.assertGreater(len(transferred), 0) 

365 artifacts = list(ResourcePath.findFileResources([destination])) 

366 self.assertEqual(set(transferred), set(artifacts)) 

367 

368 for artifact in transferred: 

369 path_in_destination = artifact.relative_to(destination) 

370 self.assertIsNotNone(path_in_destination) 

371 assert path_in_destination is not None 

372 

373 # When path is not preserved there should not 

374 # be any path separators. 

375 num_seps = path_in_destination.count("/") 

376 if preserve_path: 

377 self.assertGreater(num_seps, 0) 

378 else: 

379 self.assertEqual(num_seps, 0) 

380 

381 self.assertEqual( 

382 len(artifacts), 

383 n_uris, 

384 "Comparing expected artifacts vs actual:" 

385 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

386 ) 

387 

388 if preserve_path: 

389 # No need to run these twice 

390 with self.assertRaises(ValueError): 

391 butler.retrieveArtifacts([ref], destination, transfer="move") 

392 

393 with self.assertRaisesRegex( 

394 ValueError, "^Destination location must refer to a directory" 

395 ): 

396 butler.retrieveArtifacts( 

397 [ref], ResourcePath("/some/file.txt", forceDirectory=False) 

398 ) 

399 

400 with self.assertRaises(FileExistsError): 

401 butler.retrieveArtifacts([ref], destination) 

402 

403 transferred_again = butler.retrieveArtifacts( 

404 [ref], destination, preserve_path=preserve_path, overwrite=True 

405 ) 

406 self.assertEqual(set(transferred_again), set(transferred)) 

407 

408 # Now remove the dataset completely. 

409 butler.pruneDatasets([ref], purge=True, unstore=True) 

410 # Lookup with original args should still fail. 

411 kwargs = {"collections": this_run} 

412 if isinstance(args[0], DatasetRef): 

413 kwargs = {} # Prevent warning from being issued. 

414 self.assertFalse(butler.exists(*args, **kwargs)) 

415 # get() should still fail. 

416 with self.assertRaises(FileNotFoundError): 

417 butler.get(ref) 

418 # Registry shouldn't be able to find it by dataset_id anymore. 

419 self.assertIsNone(butler.get_dataset(ref.id)) 

420 

421 # Do explicit registry removal since we know they are 

422 # empty 

423 butler.registry.removeCollection(this_run) 

424 expected_collections.remove(this_run) 

425 

426 # Create DatasetRef for put using default run. 

427 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

428 

429 # Check that getDeferred fails with standalone ref. 

430 with self.assertRaises(LookupError): 

431 butler.getDeferred(refIn) 

432 

433 # Put the dataset again, since the last thing we did was remove it 

434 # and we want to use the default collection. 

435 ref = butler.put(metric, refIn) 

436 

437 # Get with parameters 

438 stop = 4 

439 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

440 self.assertNotEqual(metric, sliced) 

441 self.assertEqual(metric.summary, sliced.summary) 

442 self.assertEqual(metric.output, sliced.output) 

443 assert metric.data is not None # for mypy 

444 self.assertEqual(metric.data[:stop], sliced.data) 

445 # getDeferred with parameters 

446 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

447 self.assertNotEqual(metric, sliced) 

448 self.assertEqual(metric.summary, sliced.summary) 

449 self.assertEqual(metric.output, sliced.output) 

450 self.assertEqual(metric.data[:stop], sliced.data) 

451 # getDeferred with deferred parameters 

452 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

453 self.assertNotEqual(metric, sliced) 

454 self.assertEqual(metric.summary, sliced.summary) 

455 self.assertEqual(metric.output, sliced.output) 

456 self.assertEqual(metric.data[:stop], sliced.data) 

457 

458 if storageClass.isComposite(): 

459 # Check that components can be retrieved 

460 metricOut = butler.get(ref.datasetType.name, dataId) 

461 compNameS = ref.datasetType.componentTypeName("summary") 

462 compNameD = ref.datasetType.componentTypeName("data") 

463 summary = butler.get(compNameS, dataId) 

464 self.assertEqual(summary, metric.summary) 

465 data = butler.get(compNameD, dataId) 

466 self.assertEqual(data, metric.data) 

467 

468 if "counter" in storageClass.derivedComponents: 

469 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

470 self.assertEqual(count, len(data)) 

471 

472 count = butler.get( 

473 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

474 ) 

475 self.assertEqual(count, stop) 

476 

477 compRef = butler.find_dataset(compNameS, dataId, collections=butler.collections) 

478 assert compRef is not None 

479 summary = butler.get(compRef) 

480 self.assertEqual(summary, metric.summary) 

481 

482 # Create a Dataset type that has the same name but is inconsistent. 

483 inconsistentDatasetType = DatasetType( 

484 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

485 ) 

486 

487 # Getting with a dataset type that does not match registry fails 

488 with self.assertRaisesRegex( 

489 ValueError, 

490 "(Supplied dataset type .* inconsistent with registry)" 

491 "|(The new storage class .* is not compatible with the existing storage class)", 

492 ): 

493 butler.get(inconsistentDatasetType, dataId) 

494 

495 # Combining a DatasetRef with a dataId should fail 

496 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

497 butler.get(ref, dataId) 

498 # Getting with an explicit ref should fail if the id doesn't match. 

499 with self.assertRaises(FileNotFoundError): 

500 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

501 

502 # Getting a dataset with unknown parameters should fail 

503 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

504 butler.get(ref, parameters={"unsupported": True}) 

505 

506 # Check we have a collection 

507 collections = set(butler.registry.queryCollections()) 

508 self.assertEqual(collections, expected_collections) 

509 

510 # Clean up to check that we can remove something that may have 

511 # already had a component removed 

512 butler.pruneDatasets([ref], unstore=True, purge=True) 

513 

514 # Add the same ref again, so we can check that duplicate put fails. 

515 ref = butler.put(metric, datasetType, dataId) 

516 

517 # Repeat put will fail. 

518 with self.assertRaisesRegex( 

519 ConflictingDefinitionError, "A database constraint failure was triggered" 

520 ): 

521 butler.put(metric, datasetType, dataId) 

522 

523 # Remove the datastore entry. 

524 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

525 

526 # Put will still fail 

527 with self.assertRaisesRegex( 

528 ConflictingDefinitionError, "A database constraint failure was triggered" 

529 ): 

530 butler.put(metric, datasetType, dataId) 

531 

532 # Repeat the same sequence with resolved ref. 

533 butler.pruneDatasets([ref], unstore=True, purge=True) 

534 ref = butler.put(metric, refIn) 

535 

536 # Repeat put will fail. 

537 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

538 butler.put(metric, refIn) 

539 

540 # Remove the datastore entry. 

541 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

542 

543 # In case of resolved ref this write will succeed. 

544 ref = butler.put(metric, refIn) 

545 

546 # Leave the dataset in place since some downstream tests require 

547 # something to be present 

548 

549 return butler 

550 

551 def testDeferredCollectionPassing(self) -> None: 

552 # Construct a butler with no run or collection, but make it writeable. 

553 butler = self.create_empty_butler(writeable=True) 

554 # Create and register a DatasetType 

555 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

556 datasetType = self.addDatasetType( 

557 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

558 ) 

559 # Add needed Dimensions 

560 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

561 butler.registry.insertDimensionData( 

562 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

563 ) 

564 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20250101}) 

565 butler.registry.insertDimensionData( 

566 "visit", 

567 { 

568 "instrument": "DummyCamComp", 

569 "id": 423, 

570 "name": "fourtwentythree", 

571 "physical_filter": "d-r", 

572 "day_obs": 20250101, 

573 }, 

574 ) 

575 dataId = {"instrument": "DummyCamComp", "visit": 423} 

576 # Create dataset. 

577 metric = makeExampleMetrics() 

578 # Register a new run and put dataset. 

579 run = "deferred" 

580 self.assertTrue(butler.registry.registerRun(run)) 

581 # Second time it will be allowed but indicate no-op 

582 self.assertFalse(butler.registry.registerRun(run)) 

583 ref = butler.put(metric, datasetType, dataId, run=run) 

584 # Putting with no run should fail with TypeError. 

585 with self.assertRaises(CollectionError): 

586 butler.put(metric, datasetType, dataId) 

587 # Dataset should exist. 

588 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

589 # We should be able to get the dataset back, but with and without 

590 # a deferred dataset handle. 

591 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

592 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

593 # Trying to find the dataset without any collection is an error. 

594 with self.assertRaises(NoDefaultCollectionError): 

595 butler.exists(datasetType, dataId) 

596 with self.assertRaises(CollectionError): 

597 butler.get(datasetType, dataId) 

598 # Associate the dataset with a different collection. 

599 butler.registry.registerCollection("tagged") 

600 butler.registry.associate("tagged", [ref]) 

601 # Deleting the dataset from the new collection should make it findable 

602 # in the original collection. 

603 butler.pruneDatasets([ref], tags=["tagged"]) 

604 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

605 

606 

607class ButlerTests(ButlerPutGetTests): 

608 """Tests for Butler.""" 

609 

610 useTempRoot = True 

611 validationCanFail: bool 

612 fullConfigKey: str | None 

613 registryStr: str | None 

614 datastoreName: list[str] | None 

615 datastoreStr: list[str] 

616 predictionSupported = True 

617 """Does getURIs support 'prediction mode'?""" 

618 

619 def setUp(self) -> None: 

620 """Create a new butler root for each test.""" 

621 self.root = makeTestTempDir(TESTDIR) 

622 Butler.makeRepo(self.root, config=Config(self.configFile)) 

623 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

624 

625 def are_uris_equivalent(self, uri1: ResourcePath, uri2: ResourcePath) -> bool: 

626 """Return True if two URIs refer to the same resource. 

627 

628 Subclasses may override to handle unique requirements. 

629 """ 

630 return uri1 == uri2 

631 

632 def testConstructor(self) -> None: 

633 """Independent test of constructor.""" 

634 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

635 self.assertIsInstance(butler, Butler) 

636 

637 # Check that butler.yaml is added automatically. 

638 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

639 config_dir = self.tmpConfigFile[: -len(end)] 

640 butler = Butler.from_config(config_dir, run=self.default_run) 

641 self.assertIsInstance(butler, Butler) 

642 

643 # Even with a ResourcePath. 

644 butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

645 self.assertIsInstance(butler, Butler) 

646 

647 collections = set(butler.registry.queryCollections()) 

648 self.assertEqual(collections, {self.default_run}) 

649 

650 # Check that some special characters can be included in run name. 

651 special_run = "u@b.c-A" 

652 butler_special = Butler.from_config(butler=butler, run=special_run) 

653 collections = set(butler_special.registry.queryCollections("*@*")) 

654 self.assertEqual(collections, {special_run}) 

655 

656 butler2 = Butler.from_config(butler=butler, collections=["other"]) 

657 self.assertEqual(butler2.collections, ("other",)) 

658 self.assertIsNone(butler2.run) 

659 self.assertEqual(type(butler._datastore), type(butler2._datastore)) 

660 self.assertEqual(butler._datastore.config, butler2._datastore.config) 

661 

662 # Test that we can use an environment variable to find this 

663 # repository. 

664 butler_index = Config() 

665 butler_index["label"] = self.tmpConfigFile 

666 for suffix in (".yaml", ".json"): 

667 # Ensure that the content differs so that we know that 

668 # we aren't reusing the cache. 

669 bad_label = f"file://bucket/not_real{suffix}" 

670 butler_index["bad_label"] = bad_label 

671 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

672 butler_index.dumpToUri(temp_file) 

673 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

674 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"}) 

675 uri = Butler.get_repo_uri("bad_label") 

676 self.assertEqual(uri, ResourcePath(bad_label)) 

677 uri = Butler.get_repo_uri("label") 

678 butler = Butler.from_config(uri, writeable=False) 

679 self.assertIsInstance(butler, Butler) 

680 butler = Butler.from_config("label", writeable=False) 

681 self.assertIsInstance(butler, Butler) 

682 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

683 Butler.from_config("not_there", writeable=False) 

684 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): 

685 Butler.from_config("bad_label") 

686 with self.assertRaises(FileNotFoundError): 

687 # Should ignore aliases. 

688 Butler.from_config(ResourcePath("label", forceAbsolute=False)) 

689 with self.assertRaises(KeyError) as cm: 

690 Butler.get_repo_uri("missing") 

691 self.assertEqual( 

692 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False) 

693 ) 

694 self.assertIn("not known to", str(cm.exception)) 

695 # Should report no failure. 

696 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "") 

697 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

698 # Now with empty configuration. 

699 butler_index = Config() 

700 butler_index.dumpToUri(temp_file) 

701 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

702 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): 

703 Butler.from_config("label") 

704 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

705 # Now with bad contents. 

706 with open(temp_file.ospath, "w") as fh: 

707 print("'", file=fh) 

708 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

709 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): 

710 Butler.from_config("label") 

711 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

712 with self.assertRaises(FileNotFoundError): 

713 Butler.get_repo_uri("label") 

714 self.assertEqual(Butler.get_known_repos(), set()) 

715 

716 with self.assertRaisesRegex(FileNotFoundError, "index file not found"): 

717 Butler.from_config("label") 

718 

719 # Check that we can create Butler when the alias file is not found. 

720 butler = Butler.from_config(self.tmpConfigFile, writeable=False) 

721 self.assertIsInstance(butler, Butler) 

722 with self.assertRaises(RuntimeError) as cm: 

723 # No environment variable set. 

724 Butler.get_repo_uri("label") 

725 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) 

726 self.assertIn("No repository index defined", str(cm.exception)) 

727 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): 

728 # No aliases registered. 

729 Butler.from_config("not_there") 

730 self.assertEqual(Butler.get_known_repos(), set()) 

731 

732 def testDafButlerRepositories(self): 

733 with unittest.mock.patch.dict( 

734 os.environ, 

735 {"DAF_BUTLER_REPOSITORIES": "label: 'https://someuri.com'\notherLabel: 'https://otheruri.com'\n"}, 

736 ): 

737 self.assertEqual(str(Butler.get_repo_uri("label")), "https://someuri.com") 

738 

739 with unittest.mock.patch.dict( 

740 os.environ, 

741 { 

742 "DAF_BUTLER_REPOSITORIES": "label: https://someuri.com", 

743 "DAF_BUTLER_REPOSITORY_INDEX": "https://someuri.com", 

744 }, 

745 ): 

746 with self.assertRaisesRegex(RuntimeError, "Only one of the environment variables"): 

747 Butler.get_repo_uri("label") 

748 

749 with unittest.mock.patch.dict( 

750 os.environ, 

751 {"DAF_BUTLER_REPOSITORIES": "invalid"}, 

752 ): 

753 with self.assertRaisesRegex(ValueError, "Repository index not in expected format"): 

754 Butler.get_repo_uri("label") 

755 

756 def testBasicPutGet(self) -> None: 

757 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

758 self.runPutGetTest(storageClass, "test_metric") 

759 

760 def testCompositePutGetConcrete(self) -> None: 

761 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

762 butler = self.runPutGetTest(storageClass, "test_metric") 

763 

764 # Should *not* be disassembled 

765 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

766 self.assertEqual(len(datasets), 1) 

767 uri, components = butler.getURIs(datasets[0]) 

768 self.assertIsInstance(uri, ResourcePath) 

769 self.assertFalse(components) 

770 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

771 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

772 

773 # Predicted dataset 

774 if self.predictionSupported: 

775 dataId = {"instrument": "DummyCamComp", "visit": 424} 

776 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

777 self.assertFalse(components) 

778 self.assertIsInstance(uri, ResourcePath) 

779 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

780 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

781 

782 def testCompositePutGetVirtual(self) -> None: 

783 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

784 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

785 

786 # Should be disassembled 

787 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

788 self.assertEqual(len(datasets), 1) 

789 uri, components = butler.getURIs(datasets[0]) 

790 

791 if butler._datastore.isEphemeral: 

792 # Never disassemble in-memory datastore 

793 self.assertIsInstance(uri, ResourcePath) 

794 self.assertFalse(components) 

795 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

796 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

797 else: 

798 self.assertIsNone(uri) 

799 self.assertEqual(set(components), set(storageClass.components)) 

800 for compuri in components.values(): 

801 self.assertIsInstance(compuri, ResourcePath) 

802 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

803 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

804 

805 if self.predictionSupported: 

806 # Predicted dataset 

807 dataId = {"instrument": "DummyCamComp", "visit": 424} 

808 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

809 

810 if butler._datastore.isEphemeral: 

811 # Never disassembled 

812 self.assertIsInstance(uri, ResourcePath) 

813 self.assertFalse(components) 

814 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

815 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

816 else: 

817 self.assertIsNone(uri) 

818 self.assertEqual(set(components), set(storageClass.components)) 

819 for compuri in components.values(): 

820 self.assertIsInstance(compuri, ResourcePath) 

821 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

822 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

823 

824 def testStorageClassOverrideGet(self) -> None: 

825 """Test storage class conversion on get with override.""" 

826 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

827 datasetTypeName = "anything" 

828 run = self.default_run 

829 

830 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

831 

832 # Create and store a dataset. 

833 metric = makeExampleMetrics() 

834 dataId = {"instrument": "DummyCamComp", "visit": 423} 

835 

836 ref = butler.put(metric, datasetType, dataId) 

837 

838 # Return native type. 

839 retrieved = butler.get(ref) 

840 self.assertEqual(retrieved, metric) 

841 

842 # Specify an override. 

843 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

844 model = butler.get(ref, storageClass=new_sc) 

845 self.assertNotEqual(type(model), type(retrieved)) 

846 self.assertIs(type(model), new_sc.pytype) 

847 self.assertEqual(retrieved, model) 

848 

849 # Defer but override later. 

850 deferred = butler.getDeferred(ref) 

851 model = deferred.get(storageClass=new_sc) 

852 self.assertIs(type(model), new_sc.pytype) 

853 self.assertEqual(retrieved, model) 

854 

855 # Defer but override up front. 

856 deferred = butler.getDeferred(ref, storageClass=new_sc) 

857 model = deferred.get() 

858 self.assertIs(type(model), new_sc.pytype) 

859 self.assertEqual(retrieved, model) 

860 

861 # Retrieve a component. Should be a tuple. 

862 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

863 self.assertIs(type(data), tuple) 

864 self.assertEqual(data, tuple(retrieved.data)) 

865 

866 # Parameter on the write storage class should work regardless 

867 # of read storage class. 

868 data = butler.get( 

869 "anything.data", 

870 dataId, 

871 storageClass="StructuredDataDataTestTuple", 

872 parameters={"slice": slice(2, 4)}, 

873 ) 

874 self.assertEqual(len(data), 2) 

875 

876 # Try a parameter that is known to the read storage class but not 

877 # the write storage class. 

878 with self.assertRaises(KeyError): 

879 butler.get( 

880 "anything.data", 

881 dataId, 

882 storageClass="StructuredDataDataTestTuple", 

883 parameters={"xslice": slice(2, 4)}, 

884 ) 

885 

886 def testPytypePutCoercion(self) -> None: 

887 """Test python type coercion on Butler.get and put.""" 

888 # Store some data with the normal example storage class. 

889 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

890 datasetTypeName = "test_metric" 

891 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

892 

893 dataId = {"instrument": "DummyCamComp", "visit": 423} 

894 

895 # Put a dict and this should coerce to a MetricsExample 

896 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

897 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

898 test_metric = butler.get(metric_ref) 

899 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

900 self.assertEqual(test_metric.summary, test_dict["summary"]) 

901 self.assertEqual(test_metric.output, test_dict["output"]) 

902 

903 # Check that the put still works if a DatasetType is given with 

904 # a definition matching this python type. 

905 registry_type = butler.get_dataset_type(datasetTypeName) 

906 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

907 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

908 self.assertEqual(metric2_ref.datasetType, registry_type) 

909 

910 # The get will return the type expected by registry. 

911 test_metric2 = butler.get(metric2_ref) 

912 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

913 

914 # Make a new DatasetRef with the compatible but different DatasetType. 

915 # This should now return a dict. 

916 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

917 test_dict2 = butler.get(new_ref) 

918 self.assertEqual(get_full_type_name(test_dict2), "dict") 

919 

920 # Get it again with the wrong dataset type definition using get() 

921 # rather than get(). This should be consistent with get() 

922 # behavior and return the type of the DatasetType. 

923 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

924 self.assertEqual(get_full_type_name(test_dict3), "dict") 

925 

926 def testIngest(self) -> None: 

927 butler = self.create_empty_butler(run=self.default_run) 

928 

929 # Create and register a DatasetType 

930 dimensions = butler.dimensions.conform(["instrument", "visit", "detector"]) 

931 

932 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

933 datasetTypeName = "metric" 

934 

935 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

936 

937 # Add needed Dimensions 

938 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

939 butler.registry.insertDimensionData( 

940 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

941 ) 

942 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20250101}) 

943 for detector in (1, 2): 

944 butler.registry.insertDimensionData( 

945 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

946 ) 

947 

948 butler.registry.insertDimensionData( 

949 "visit", 

950 { 

951 "instrument": "DummyCamComp", 

952 "id": 423, 

953 "name": "fourtwentythree", 

954 "physical_filter": "d-r", 

955 "day_obs": 20250101, 

956 }, 

957 { 

958 "instrument": "DummyCamComp", 

959 "id": 424, 

960 "name": "fourtwentyfour", 

961 "physical_filter": "d-r", 

962 "day_obs": 20250101, 

963 }, 

964 ) 

965 

966 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

967 dataRoot = os.path.join(TESTDIR, "data", "basic") 

968 datasets = [] 

969 for detector in (1, 2): 

970 detector_name = f"detector_{detector}" 

971 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

972 dataId = butler.registry.expandDataId( 

973 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

974 ) 

975 # Create a DatasetRef for ingest 

976 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

977 

978 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

979 

980 butler.ingest(*datasets, transfer="copy") 

981 

982 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

983 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

984 

985 metrics1 = butler.get(datasetTypeName, dataId1) 

986 metrics2 = butler.get(datasetTypeName, dataId2) 

987 self.assertNotEqual(metrics1, metrics2) 

988 

989 # Compare URIs 

990 uri1 = butler.getURI(datasetTypeName, dataId1) 

991 uri2 = butler.getURI(datasetTypeName, dataId2) 

992 self.assertFalse(self.are_uris_equivalent(uri1, uri2), f"Cf. {uri1} with {uri2}") 

993 

994 # Now do a multi-dataset but single file ingest 

995 metricFile = os.path.join(dataRoot, "detectors.yaml") 

996 refs = [] 

997 for detector in (1, 2): 

998 detector_name = f"detector_{detector}" 

999 dataId = butler.registry.expandDataId( 

1000 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

1001 ) 

1002 # Create a DatasetRef for ingest 

1003 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

1004 

1005 # Test "move" transfer to ensure that the files themselves 

1006 # have disappeared following ingest. 

1007 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

1008 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

1009 

1010 datasets = [] 

1011 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

1012 

1013 # For first ingest use copy. 

1014 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

1015 

1016 # Now try to ingest again in "execution butler" mode where 

1017 # the registry entries exist but the datastore does not have 

1018 # the files. We also need to strip the dimension records to ensure 

1019 # that they will be re-added by the ingest. 

1020 ref = datasets[0].refs[0] 

1021 datasets[0].refs = [ 

1022 cast( 

1023 DatasetRef, 

1024 butler.find_dataset(ref.datasetType, data_id=ref.dataId, collections=ref.run), 

1025 ) 

1026 for ref in datasets[0].refs 

1027 ] 

1028 all_refs = [] 

1029 for dataset in datasets: 

1030 refs = [] 

1031 for ref in dataset.refs: 

1032 # Create a dict from the dataId to drop the records. 

1033 new_data_id = dict(ref.dataId.required) 

1034 new_ref = butler.find_dataset(ref.datasetType, new_data_id, collections=ref.run) 

1035 assert new_ref is not None 

1036 self.assertFalse(new_ref.dataId.hasRecords()) 

1037 refs.append(new_ref) 

1038 dataset.refs = refs 

1039 all_refs.extend(dataset.refs) 

1040 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

1041 

1042 # Use move mode to test that the file is deleted. Also 

1043 # disable recording of file size. 

1044 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

1045 

1046 # Check that every ref now has records. 

1047 for dataset in datasets: 

1048 for ref in dataset.refs: 

1049 self.assertTrue(ref.dataId.hasRecords()) 

1050 

1051 # Ensure that the file has disappeared. 

1052 self.assertFalse(tempFile.exists()) 

1053 

1054 # Check that the datastore recorded no file size. 

1055 # Not all datastores can support this. 

1056 try: 

1057 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

1058 self.assertEqual(infos[0].file_size, -1) 

1059 except AttributeError: 

1060 pass 

1061 

1062 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

1063 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

1064 

1065 multi1 = butler.get(datasetTypeName, dataId1) 

1066 multi2 = butler.get(datasetTypeName, dataId2) 

1067 

1068 self.assertEqual(multi1, metrics1) 

1069 self.assertEqual(multi2, metrics2) 

1070 

1071 # Compare URIs 

1072 uri1 = butler.getURI(datasetTypeName, dataId1) 

1073 uri2 = butler.getURI(datasetTypeName, dataId2) 

1074 self.assertTrue(self.are_uris_equivalent(uri1, uri2), f"Cf. {uri1} with {uri2}") 

1075 

1076 # Test that removing one does not break the second 

1077 # This line will issue a warning log message for a ChainedDatastore 

1078 # that uses an InMemoryDatastore since in-memory can not ingest 

1079 # files. 

1080 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

1081 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

1082 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

1083 multi2b = butler.get(datasetTypeName, dataId2) 

1084 self.assertEqual(multi2, multi2b) 

1085 

1086 # Ensure we can ingest 0 datasets 

1087 datasets = [] 

1088 butler.ingest(*datasets) 

1089 

1090 def testPickle(self) -> None: 

1091 """Test pickle support.""" 

1092 butler = self.create_empty_butler(run=self.default_run) 

1093 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

1094 butlerOut = pickle.loads(pickle.dumps(butler)) 

1095 self.assertIsInstance(butlerOut, Butler) 

1096 self.assertEqual(butlerOut._config, butler._config) 

1097 self.assertEqual(butlerOut.collections, butler.collections) 

1098 self.assertEqual(butlerOut.run, butler.run) 

1099 

1100 def testGetDatasetTypes(self) -> None: 

1101 butler = self.create_empty_butler(run=self.default_run) 

1102 dimensions = butler.dimensions.conform(["instrument", "visit", "physical_filter"]) 

1103 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

1104 ( 

1105 "instrument", 

1106 [ 

1107 {"instrument": "DummyCam"}, 

1108 {"instrument": "DummyHSC"}, 

1109 {"instrument": "DummyCamComp"}, 

1110 ], 

1111 ), 

1112 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

1113 ("day_obs", [{"instrument": "DummyCam", "id": 20250101}]), 

1114 ( 

1115 "visit", 

1116 [ 

1117 { 

1118 "instrument": "DummyCam", 

1119 "id": 42, 

1120 "name": "fortytwo", 

1121 "physical_filter": "d-r", 

1122 "day_obs": 20250101, 

1123 } 

1124 ], 

1125 ), 

1126 ] 

1127 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1128 # Add needed Dimensions 

1129 for element, data in dimensionEntries: 

1130 butler.registry.insertDimensionData(element, *data) 

1131 

1132 # When a DatasetType is added to the registry entries are not created 

1133 # for components but querying them can return the components. 

1134 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1135 components = set() 

1136 for datasetTypeName in datasetTypeNames: 

1137 # Create and register a DatasetType 

1138 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1139 

1140 for componentName in storageClass.components: 

1141 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1142 

1143 fromRegistry: set[DatasetType] = set() 

1144 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1145 fromRegistry.add(parent_dataset_type) 

1146 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1147 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1148 

1149 # Now that we have some dataset types registered, validate them 

1150 butler.validateConfiguration( 

1151 ignore=[ 

1152 "test_metric_comp", 

1153 "metric3", 

1154 "metric5", 

1155 "calexp", 

1156 "DummySC", 

1157 "datasetType.component", 

1158 "random_data", 

1159 "random_data_2", 

1160 ] 

1161 ) 

1162 

1163 # Add a new datasetType that will fail template validation 

1164 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1165 if self.validationCanFail: 

1166 with self.assertRaises(ValidationError): 

1167 butler.validateConfiguration() 

1168 

1169 # Rerun validation but with a subset of dataset type names 

1170 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1171 

1172 # Rerun validation but ignore the bad datasetType 

1173 butler.validateConfiguration( 

1174 ignore=[ 

1175 "test_metric_comp", 

1176 "metric3", 

1177 "metric5", 

1178 "calexp", 

1179 "DummySC", 

1180 "datasetType.component", 

1181 "random_data", 

1182 "random_data_2", 

1183 ] 

1184 ) 

1185 

1186 def testTransaction(self) -> None: 

1187 butler = self.create_empty_butler(run=self.default_run) 

1188 datasetTypeName = "test_metric" 

1189 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1190 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1191 ("instrument", {"instrument": "DummyCam"}), 

1192 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1193 ("day_obs", {"instrument": "DummyCam", "id": 20250101}), 

1194 ( 

1195 "visit", 

1196 { 

1197 "instrument": "DummyCam", 

1198 "id": 42, 

1199 "name": "fortytwo", 

1200 "physical_filter": "d-r", 

1201 "day_obs": 20250101, 

1202 }, 

1203 ), 

1204 ) 

1205 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1206 metric = makeExampleMetrics() 

1207 dataId = {"instrument": "DummyCam", "visit": 42} 

1208 # Create and register a DatasetType 

1209 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1210 with self.assertRaises(TransactionTestError): 

1211 with butler.transaction(): 

1212 # Add needed Dimensions 

1213 for args in dimensionEntries: 

1214 butler.registry.insertDimensionData(*args) 

1215 # Store a dataset 

1216 ref = butler.put(metric, datasetTypeName, dataId) 

1217 self.assertIsInstance(ref, DatasetRef) 

1218 # Test get of a ref. 

1219 metricOut = butler.get(ref) 

1220 self.assertEqual(metric, metricOut) 

1221 # Test get 

1222 metricOut = butler.get(datasetTypeName, dataId) 

1223 self.assertEqual(metric, metricOut) 

1224 # Check we can get components 

1225 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1226 raise TransactionTestError("This should roll back the entire transaction") 

1227 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1228 butler.registry.expandDataId(dataId) 

1229 # Should raise LookupError for missing data ID value 

1230 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1231 butler.get(datasetTypeName, dataId) 

1232 # Also check explicitly if Dataset entry is missing 

1233 self.assertIsNone(butler.find_dataset(datasetType, dataId, collections=butler.collections)) 

1234 # Direct retrieval should not find the file in the Datastore 

1235 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1236 butler.get(ref) 

1237 

1238 def testMakeRepo(self) -> None: 

1239 """Test that we can write butler configuration to a new repository via 

1240 the Butler.makeRepo interface and then instantiate a butler from the 

1241 repo root. 

1242 """ 

1243 # Do not run the test if we know this datastore configuration does 

1244 # not support a file system root 

1245 if self.fullConfigKey is None: 

1246 return 

1247 

1248 # create two separate directories 

1249 root1 = tempfile.mkdtemp(dir=self.root) 

1250 root2 = tempfile.mkdtemp(dir=self.root) 

1251 

1252 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1253 limited = Config(self.configFile) 

1254 butler1 = Butler.from_config(butlerConfig) 

1255 assert isinstance(butler1, DirectButler), "Expect DirectButler in configuration" 

1256 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1257 full = Config(self.tmpConfigFile) 

1258 butler2 = Butler.from_config(butlerConfig) 

1259 assert isinstance(butler2, DirectButler), "Expect DirectButler in configuration" 

1260 # Butlers should have the same configuration regardless of whether 

1261 # defaults were expanded. 

1262 self.assertEqual(butler1._config, butler2._config) 

1263 # Config files loaded directly should not be the same. 

1264 self.assertNotEqual(limited, full) 

1265 # Make sure "limited" doesn't have a few keys we know it should be 

1266 # inheriting from defaults. 

1267 self.assertIn(self.fullConfigKey, full) 

1268 self.assertNotIn(self.fullConfigKey, limited) 

1269 

1270 # Collections don't appear until something is put in them 

1271 collections1 = set(butler1.registry.queryCollections()) 

1272 self.assertEqual(collections1, set()) 

1273 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1274 

1275 # Check that a config with no associated file name will not 

1276 # work properly with relocatable Butler repo 

1277 butlerConfig.configFile = None 

1278 with self.assertRaises(ValueError): 

1279 Butler.from_config(butlerConfig) 

1280 

1281 with self.assertRaises(FileExistsError): 

1282 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1283 

1284 def testStringification(self) -> None: 

1285 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1286 butlerStr = str(butler) 

1287 

1288 if self.datastoreStr is not None: 

1289 for testStr in self.datastoreStr: 

1290 self.assertIn(testStr, butlerStr) 

1291 if self.registryStr is not None: 

1292 self.assertIn(self.registryStr, butlerStr) 

1293 

1294 datastoreName = butler._datastore.name 

1295 if self.datastoreName is not None: 

1296 for testStr in self.datastoreName: 

1297 self.assertIn(testStr, datastoreName) 

1298 

1299 def testButlerRewriteDataId(self) -> None: 

1300 """Test that dataIds can be rewritten based on dimension records.""" 

1301 butler = self.create_empty_butler(run=self.default_run) 

1302 

1303 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1304 datasetTypeName = "random_data" 

1305 

1306 # Create dimension records. 

1307 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1308 butler.registry.insertDimensionData( 

1309 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1310 ) 

1311 butler.registry.insertDimensionData( 

1312 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1313 ) 

1314 

1315 dimensions = butler.dimensions.conform(["instrument", "exposure"]) 

1316 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1317 butler.registry.registerDatasetType(datasetType) 

1318 

1319 n_exposures = 5 

1320 dayobs = 20210530 

1321 

1322 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": dayobs}) 

1323 

1324 for i in range(n_exposures): 

1325 butler.registry.insertDimensionData("group", {"instrument": "DummyCamComp", "name": f"group{i}"}) 

1326 butler.registry.insertDimensionData( 

1327 "exposure", 

1328 { 

1329 "instrument": "DummyCamComp", 

1330 "id": i, 

1331 "obs_id": f"exp{i}", 

1332 "seq_num": i, 

1333 "day_obs": dayobs, 

1334 "physical_filter": "d-r", 

1335 "group": f"group{i}", 

1336 }, 

1337 ) 

1338 

1339 # Write some data. 

1340 for i in range(n_exposures): 

1341 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1342 

1343 # Use the seq_num for the put to test rewriting. 

1344 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1345 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1346 

1347 # Check that the exposure is correct in the dataId 

1348 self.assertEqual(ref.dataId["exposure"], i) 

1349 

1350 # and check that we can get the dataset back with the same dataId 

1351 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1352 self.assertEqual(new_metric, metric) 

1353 

1354 # Check that we can find the datasets using the day_obs or the 

1355 # exposure.day_obs. 

1356 datasets_1 = list( 

1357 butler.registry.queryDatasets( 

1358 datasetType, 

1359 collections=self.default_run, 

1360 where="day_obs = dayObs AND instrument = instr", 

1361 bind={"dayObs": dayobs, "instr": "DummyCamComp"}, 

1362 ) 

1363 ) 

1364 datasets_2 = list( 

1365 butler.registry.queryDatasets( 

1366 datasetType, 

1367 collections=self.default_run, 

1368 where="exposure.day_obs = dayObs AND instrument = instr", 

1369 bind={"dayObs": dayobs, "instr": "DummyCamComp"}, 

1370 ) 

1371 ) 

1372 self.assertEqual(datasets_1, datasets_2) 

1373 

1374 def testGetDatasetCollectionCaching(self): 

1375 # Prior to DM-41117, there was a bug where get_dataset would throw 

1376 # MissingCollectionError if you tried to fetch a dataset that was added 

1377 # after the collection cache was last updated. 

1378 reader_butler, datasetType = self.create_butler(self.default_run, "int", "datasettypename") 

1379 writer_butler = self.create_empty_butler(writeable=True, run="new_run") 

1380 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1381 put_ref = writer_butler.put(123, datasetType, dataId) 

1382 get_ref = reader_butler.get_dataset(put_ref.id) 

1383 self.assertEqual(get_ref.id, put_ref.id) 

1384 

1385 

1386class FileDatastoreButlerTests(ButlerTests): 

1387 """Common tests and specialization of ButlerTests for butlers backed 

1388 by datastores that inherit from FileDatastore. 

1389 """ 

1390 

1391 trustModeSupported = True 

1392 

1393 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1394 """Check if file exists at a given path (relative to root). 

1395 

1396 Test testPutTemplates verifies actual physical existance of the files 

1397 in the requested location. 

1398 """ 

1399 uri = ResourcePath(root, forceDirectory=True) 

1400 return uri.join(relpath).exists() 

1401 

1402 def testPutTemplates(self) -> None: 

1403 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1404 butler = self.create_empty_butler(run=self.default_run) 

1405 

1406 # Add needed Dimensions 

1407 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1408 butler.registry.insertDimensionData( 

1409 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1410 ) 

1411 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20250101}) 

1412 butler.registry.insertDimensionData( 

1413 "visit", 

1414 { 

1415 "instrument": "DummyCamComp", 

1416 "id": 423, 

1417 "name": "v423", 

1418 "physical_filter": "d-r", 

1419 "day_obs": 20250101, 

1420 }, 

1421 ) 

1422 butler.registry.insertDimensionData( 

1423 "visit", 

1424 { 

1425 "instrument": "DummyCamComp", 

1426 "id": 425, 

1427 "name": "v425", 

1428 "physical_filter": "d-r", 

1429 "day_obs": 20250101, 

1430 }, 

1431 ) 

1432 

1433 # Create and store a dataset 

1434 metric = makeExampleMetrics() 

1435 

1436 # Create two almost-identical DatasetTypes (both will use default 

1437 # template) 

1438 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1439 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1440 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1441 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1442 

1443 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1444 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1445 

1446 # Put with exactly the data ID keys needed 

1447 ref = butler.put(metric, "metric1", dataId1) 

1448 uri = butler.getURI(ref) 

1449 self.assertTrue(uri.exists()) 

1450 self.assertTrue( 

1451 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1452 ) 

1453 

1454 # Check the template based on dimensions 

1455 if hasattr(butler._datastore, "templates"): 

1456 butler._datastore.templates.validateTemplates([ref]) 

1457 

1458 # Put with extra data ID keys (physical_filter is an optional 

1459 # dependency); should not change template (at least the way we're 

1460 # defining them to behave now; the important thing is that they 

1461 # must be consistent). 

1462 ref = butler.put(metric, "metric2", dataId2) 

1463 uri = butler.getURI(ref) 

1464 self.assertTrue(uri.exists()) 

1465 self.assertTrue( 

1466 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1467 ) 

1468 

1469 # Check the template based on dimensions 

1470 if hasattr(butler._datastore, "templates"): 

1471 butler._datastore.templates.validateTemplates([ref]) 

1472 

1473 # Use a template that has a typo in dimension record metadata. 

1474 # Easier to test with a butler that has a ref with records attached. 

1475 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1476 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1477 path = template.format(ref) 

1478 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1479 

1480 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1481 with self.assertRaises(KeyError): 

1482 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1483 template.format(ref) 

1484 

1485 # Now use a file template that will not result in unique filenames 

1486 with self.assertRaises(FileTemplateValidationError): 

1487 butler.put(metric, "metric3", dataId1) 

1488 

1489 def testImportExport(self) -> None: 

1490 # Run put/get tests just to create and populate a repo. 

1491 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1492 self.runImportExportTest(storageClass) 

1493 

1494 @unittest.expectedFailure 

1495 def testImportExportVirtualComposite(self) -> None: 

1496 # Run put/get tests just to create and populate a repo. 

1497 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1498 self.runImportExportTest(storageClass) 

1499 

1500 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1501 """Test exporting and importing. 

1502 

1503 This test does an export to a temp directory and an import back 

1504 into a new temp directory repo. It does not assume a posix datastore. 

1505 """ 

1506 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1507 

1508 # Test that we must have a file extension. 

1509 with self.assertRaises(ValueError): 

1510 with exportButler.export(filename="dump", directory=".") as export: 

1511 pass 

1512 

1513 # Test that unknown format is not allowed. 

1514 with self.assertRaises(ValueError): 

1515 with exportButler.export(filename="dump.fits", directory=".") as export: 

1516 pass 

1517 

1518 # Test that the repo actually has at least one dataset. 

1519 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1520 self.assertGreater(len(datasets), 0) 

1521 # Add a DimensionRecord that's unused by those datasets. 

1522 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1523 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1524 # Export and then import datasets. 

1525 with safeTestTempDir(TESTDIR) as exportDir: 

1526 exportFile = os.path.join(exportDir, "exports.yaml") 

1527 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1528 export.saveDatasets(datasets) 

1529 # Export the same datasets again. This should quietly do 

1530 # nothing because of internal deduplication, and it shouldn't 

1531 # complain about being asked to export the "htm7" elements even 

1532 # though there aren't any in these datasets or in the database. 

1533 export.saveDatasets(datasets, elements=["htm7"]) 

1534 # Save one of the data IDs again; this should be harmless 

1535 # because of internal deduplication. 

1536 export.saveDataIds([datasets[0].dataId]) 

1537 # Save some dimension records directly. 

1538 export.saveDimensionData("skymap", [skymapRecord]) 

1539 self.assertTrue(os.path.exists(exportFile)) 

1540 with safeTestTempDir(TESTDIR) as importDir: 

1541 # We always want this to be a local posix butler 

1542 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1543 # Calling script.butlerImport tests the implementation of the 

1544 # butler command line interface "import" subcommand. Functions 

1545 # in the script folder are generally considered protected and 

1546 # should not be used as public api. 

1547 with open(exportFile) as f: 

1548 script.butlerImport( 

1549 importDir, 

1550 export_file=f, 

1551 directory=exportDir, 

1552 transfer="auto", 

1553 skip_dimensions=None, 

1554 ) 

1555 importButler = Butler.from_config(importDir, run=self.default_run) 

1556 for ref in datasets: 

1557 with self.subTest(ref=ref): 

1558 # Test for existence by passing in the DatasetType and 

1559 # data ID separately, to avoid lookup by dataset_id. 

1560 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1561 self.assertEqual( 

1562 list(importButler.registry.queryDimensionRecords("skymap")), 

1563 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)], 

1564 ) 

1565 

1566 def testRemoveRuns(self) -> None: 

1567 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1568 butler = self.create_empty_butler(writeable=True) 

1569 # Load registry data with dimensions to hang datasets off of. 

1570 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1571 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1572 # Add some RUN-type collection. 

1573 run1 = "run1" 

1574 butler.registry.registerRun(run1) 

1575 run2 = "run2" 

1576 butler.registry.registerRun(run2) 

1577 # put a dataset in each 

1578 metric = makeExampleMetrics() 

1579 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1580 datasetType = self.addDatasetType( 

1581 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1582 ) 

1583 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1584 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1585 uri1 = butler.getURI(ref1) 

1586 uri2 = butler.getURI(ref2) 

1587 

1588 with self.assertRaises(OrphanedRecordError): 

1589 butler.registry.removeDatasetType(datasetType.name) 

1590 

1591 # Remove from both runs with different values for unstore. 

1592 butler.removeRuns([run1], unstore=True) 

1593 butler.removeRuns([run2], unstore=False) 

1594 # Should be nothing in registry for either one, and datastore should 

1595 # not think either exists. 

1596 with self.assertRaises(MissingCollectionError): 

1597 butler.registry.getCollectionType(run1) 

1598 with self.assertRaises(MissingCollectionError): 

1599 butler.registry.getCollectionType(run2) 

1600 self.assertFalse(butler.stored(ref1)) 

1601 self.assertFalse(butler.stored(ref2)) 

1602 # The ref we unstored should be gone according to the URI, but the 

1603 # one we forgot should still be around. 

1604 self.assertFalse(uri1.exists()) 

1605 self.assertTrue(uri2.exists()) 

1606 

1607 # Now that the collections have been pruned we can remove the 

1608 # dataset type 

1609 butler.registry.removeDatasetType(datasetType.name) 

1610 

1611 with self.assertLogs("lsst.daf.butler.registry", "INFO") as cm: 

1612 butler.registry.removeDatasetType(("test*", "test*")) 

1613 self.assertIn("not defined", "\n".join(cm.output)) 

1614 

1615 def remove_dataset_out_of_band(self, butler: Butler, ref: DatasetRef) -> None: 

1616 """Simulate an external actor removing a file outside of Butler's 

1617 knowledge. 

1618 

1619 Subclasses may override to handle more complicated datastore 

1620 configurations. 

1621 """ 

1622 uri = butler.getURI(ref) 

1623 uri.remove() 

1624 datastore = cast(FileDatastore, butler._datastore) 

1625 datastore.cacheManager.remove_from_cache(ref) 

1626 

1627 def testPruneDatasets(self) -> None: 

1628 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1629 butler = self.create_empty_butler(writeable=True) 

1630 # Load registry data with dimensions to hang datasets off of. 

1631 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1632 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1633 # Add some RUN-type collections. 

1634 run1 = "run1" 

1635 butler.registry.registerRun(run1) 

1636 run2 = "run2" 

1637 butler.registry.registerRun(run2) 

1638 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1639 # different runs. ref3 has a different data ID. 

1640 metric = makeExampleMetrics() 

1641 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1642 datasetType = self.addDatasetType( 

1643 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1644 ) 

1645 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1646 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1647 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1648 

1649 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1650 for ref, stored in many_stored.items(): 

1651 self.assertTrue(stored, f"Ref {ref} should be stored") 

1652 

1653 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1654 for ref, exists in many_exists.items(): 

1655 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1656 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1657 

1658 # Simple prune. 

1659 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1660 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1661 

1662 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1663 for ref, stored in many_stored.items(): 

1664 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1665 

1666 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1667 for ref, exists in many_exists.items(): 

1668 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1669 

1670 # Put data back. 

1671 ref1_new = butler.put(metric, ref1) 

1672 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1673 ref2 = butler.put(metric, ref2) 

1674 

1675 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1676 self.assertTrue(many_stored[ref1]) 

1677 self.assertTrue(many_stored[ref2]) 

1678 self.assertFalse(many_stored[ref3]) 

1679 

1680 ref3 = butler.put(metric, ref3) 

1681 

1682 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1683 for ref, exists in many_exists.items(): 

1684 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1685 

1686 # Clear out the datasets from registry and start again. 

1687 refs = [ref1, ref2, ref3] 

1688 butler.pruneDatasets(refs, purge=True, unstore=True) 

1689 for ref in refs: 

1690 butler.put(metric, ref) 

1691 

1692 # Confirm we can retrieve deferred. 

1693 dref1 = butler.getDeferred(ref1) # known and exists 

1694 metric1 = dref1.get() 

1695 self.assertEqual(metric1, metric) 

1696 

1697 # Test different forms of file availability. 

1698 # Need to be in a state where: 

1699 # - one ref just has registry record. 

1700 # - one ref has a missing file but a datastore record. 

1701 # - one ref has a missing datastore record but file is there. 

1702 # - one ref does not exist anywhere. 

1703 # Do not need to test a ref that has everything since that is tested 

1704 # above. 

1705 ref0 = DatasetRef( 

1706 datasetType, 

1707 DataCoordinate.standardize( 

1708 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1709 ), 

1710 run=run1, 

1711 ) 

1712 

1713 # Delete from datastore and retain in Registry. 

1714 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1715 

1716 # File has been removed. 

1717 self.remove_dataset_out_of_band(butler, ref2) 

1718 

1719 # Datastore has lost track. 

1720 butler._datastore.forget([ref3]) 

1721 

1722 # First test with a standard butler. 

1723 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1724 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1725 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1726 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1727 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1728 

1729 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1730 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1731 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1732 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1733 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1734 self.assertTrue(exists_many[ref2]) 

1735 

1736 # Check that per-ref query gives the same answer as many query. 

1737 for ref, exists in exists_many.items(): 

1738 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1739 

1740 # Get deferred checks for existence before it allows it to be 

1741 # retrieved. 

1742 with self.assertRaises(LookupError): 

1743 butler.getDeferred(ref3) # not known, file exists 

1744 dref2 = butler.getDeferred(ref2) # known but file missing 

1745 with self.assertRaises(FileNotFoundError): 

1746 dref2.get() 

1747 

1748 # Test again with a trusting butler. 

1749 if self.trustModeSupported: 

1750 butler._datastore.trustGetRequest = True 

1751 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1752 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1753 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1754 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1755 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1756 

1757 # When trusting we can get a deferred dataset handle that is not 

1758 # known but does exist. 

1759 dref3 = butler.getDeferred(ref3) 

1760 metric3 = dref3.get() 

1761 self.assertEqual(metric3, metric) 

1762 

1763 # Check that per-ref query gives the same answer as many query. 

1764 for ref, exists in exists_many.items(): 

1765 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1766 

1767 # Create a ref that surprisingly has the UUID of an existing ref 

1768 # but is not the same. 

1769 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1770 with self.assertRaises(ValueError): 

1771 butler.exists(ref_bad) 

1772 

1773 # Create a ref that has a compatible storage class. 

1774 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1775 exists = butler.exists(ref_compat) 

1776 self.assertEqual(exists, exists_many[ref2]) 

1777 

1778 # Remove everything and start from scratch. 

1779 butler._datastore.trustGetRequest = False 

1780 butler.pruneDatasets(refs, purge=True, unstore=True) 

1781 for ref in refs: 

1782 butler.put(metric, ref) 

1783 

1784 # These tests mess directly with the trash table and can leave the 

1785 # datastore in an odd state. Do them at the end. 

1786 # Check that in normal mode, deleting the record will lead to 

1787 # trash not touching the file. 

1788 uri1 = butler.getURI(ref1) 

1789 butler._datastore.bridge.moveToTrash( 

1790 [ref1], transaction=None 

1791 ) # Update the dataset_location table 

1792 butler._datastore.forget([ref1]) 

1793 butler._datastore.trash(ref1) 

1794 butler._datastore.emptyTrash() 

1795 self.assertTrue(uri1.exists()) 

1796 uri1.remove() # Clean it up. 

1797 

1798 # Simulate execution butler setup by deleting the datastore 

1799 # record but keeping the file around and trusting. 

1800 butler._datastore.trustGetRequest = True 

1801 uris = butler.get_many_uris([ref2, ref3]) 

1802 uri2 = uris[ref2].primaryURI 

1803 uri3 = uris[ref3].primaryURI 

1804 self.assertTrue(uri2.exists()) 

1805 self.assertTrue(uri3.exists()) 

1806 

1807 # Remove the datastore record. 

1808 butler._datastore.bridge.moveToTrash( 

1809 [ref2], transaction=None 

1810 ) # Update the dataset_location table 

1811 butler._datastore.forget([ref2]) 

1812 self.assertTrue(uri2.exists()) 

1813 butler._datastore.trash([ref2, ref3]) 

1814 # Immediate removal for ref2 file 

1815 self.assertFalse(uri2.exists()) 

1816 # But ref3 has to wait for the empty. 

1817 self.assertTrue(uri3.exists()) 

1818 butler._datastore.emptyTrash() 

1819 self.assertFalse(uri3.exists()) 

1820 

1821 # Clear out the datasets from registry. 

1822 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1823 

1824 

1825class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1826 """PosixDatastore specialization of a butler""" 

1827 

1828 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1829 fullConfigKey: str | None = ".datastore.formatters" 

1830 validationCanFail = True 

1831 datastoreStr = ["/tmp"] 

1832 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1833 registryStr = "/gen3.sqlite3" 

1834 

1835 def testPathConstructor(self) -> None: 

1836 """Independent test of constructor using PathLike.""" 

1837 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1838 self.assertIsInstance(butler, Butler) 

1839 

1840 # And again with a Path object with the butler yaml 

1841 path = pathlib.Path(self.tmpConfigFile) 

1842 butler = Butler.from_config(path, writeable=False) 

1843 self.assertIsInstance(butler, Butler) 

1844 

1845 # And again with a Path object without the butler yaml 

1846 # (making sure we skip it if the tmp config doesn't end 

1847 # in butler.yaml -- which is the case for a subclass) 

1848 if self.tmpConfigFile.endswith("butler.yaml"): 

1849 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1850 butler = Butler.from_config(path, writeable=False) 

1851 self.assertIsInstance(butler, Butler) 

1852 

1853 def testExportTransferCopy(self) -> None: 

1854 """Test local export using all transfer modes""" 

1855 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1856 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1857 # Test that the repo actually has at least one dataset. 

1858 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1859 self.assertGreater(len(datasets), 0) 

1860 uris = [exportButler.getURI(d) for d in datasets] 

1861 assert isinstance(exportButler._datastore, FileDatastore) 

1862 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]] 

1863 

1864 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1865 

1866 for path in pathsInStore: 

1867 # Assume local file system 

1868 assert path is not None 

1869 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1870 

1871 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1872 with safeTestTempDir(TESTDIR) as exportDir: 

1873 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1874 export.saveDatasets(datasets) 

1875 for path in pathsInStore: 

1876 assert path is not None 

1877 self.assertTrue( 

1878 self.checkFileExists(exportDir, path), 

1879 f"Check that mode {transfer} exported files", 

1880 ) 

1881 

1882 def testPytypeCoercion(self) -> None: 

1883 """Test python type coercion on Butler.get and put.""" 

1884 # Store some data with the normal example storage class. 

1885 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1886 datasetTypeName = "test_metric" 

1887 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1888 

1889 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1890 metric = butler.get(datasetTypeName, dataId=dataId) 

1891 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1892 

1893 datasetType_ori = butler.get_dataset_type(datasetTypeName) 

1894 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1895 

1896 # Now need to hack the registry dataset type definition. 

1897 # There is no API for this. 

1898 assert isinstance(butler._registry, SqlRegistry) 

1899 manager = butler._registry._managers.datasets 

1900 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1901 manager._db.update( 

1902 manager._static.dataset_type, 

1903 {"name": datasetTypeName}, 

1904 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1905 ) 

1906 

1907 # Force reset of dataset type cache 

1908 butler.registry.refresh() 

1909 

1910 datasetType_new = butler.get_dataset_type(datasetTypeName) 

1911 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1912 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1913 

1914 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1915 self.assertNotEqual(type(metric_model), type(metric)) 

1916 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1917 

1918 # Put the model and read it back to show that everything now 

1919 # works as normal. 

1920 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1921 metric_model_new = butler.get(metric_ref) 

1922 self.assertEqual(metric_model_new, metric_model) 

1923 

1924 # Hack the storage class again to something that will fail on the 

1925 # get with no conversion class. 

1926 manager._db.update( 

1927 manager._static.dataset_type, 

1928 {"name": datasetTypeName}, 

1929 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1930 ) 

1931 butler.registry.refresh() 

1932 

1933 with self.assertRaises(ValueError): 

1934 butler.get(datasetTypeName, dataId=dataId) 

1935 

1936 

1937@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1938class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1939 """PosixDatastore specialization of a butler using Postgres""" 

1940 

1941 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1942 fullConfigKey = ".datastore.formatters" 

1943 validationCanFail = True 

1944 datastoreStr = ["/tmp"] 

1945 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1946 registryStr = "PostgreSQL@test" 

1947 postgresql: Any 

1948 

1949 @staticmethod 

1950 def _handler(postgresql: Any) -> None: 

1951 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1952 with engine.begin() as connection: 

1953 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1954 

1955 @classmethod 

1956 def setUpClass(cls) -> None: 

1957 # Create the postgres test server. 

1958 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1959 cache_initialized_db=True, on_initialized=cls._handler 

1960 ) 

1961 super().setUpClass() 

1962 

1963 @classmethod 

1964 def tearDownClass(cls) -> None: 

1965 # Clean up any lingering SQLAlchemy engines/connections 

1966 # so they're closed before we shut down the server. 

1967 gc.collect() 

1968 cls.postgresql.clear_cache() 

1969 super().tearDownClass() 

1970 

1971 def setUp(self) -> None: 

1972 self.server = self.postgresql() 

1973 

1974 # Need to add a registry section to the config. 

1975 self._temp_config = False 

1976 config = Config(self.configFile) 

1977 config["registry", "db"] = self.server.url() 

1978 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1979 config.dump(fh) 

1980 self.configFile = fh.name 

1981 self._temp_config = True 

1982 super().setUp() 

1983 

1984 def tearDown(self) -> None: 

1985 self.server.stop() 

1986 if self._temp_config and os.path.exists(self.configFile): 

1987 os.remove(self.configFile) 

1988 super().tearDown() 

1989 

1990 def testMakeRepo(self) -> None: 

1991 # The base class test assumes that it's using sqlite and assumes 

1992 # the config file is acceptable to sqlite. 

1993 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1994 

1995 

1996@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1997class ClonedPostgresPosixDatastoreButlerTestCase(PostgresPosixDatastoreButlerTestCase, unittest.TestCase): 

1998 """Test that Butler with a Postgres registry still works after cloning.""" 

1999 

2000 def create_butler( 

2001 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

2002 ) -> tuple[DirectButler, DatasetType]: 

2003 butler, datasetType = super().create_butler(run, storageClass, datasetTypeName) 

2004 return butler._clone(run=run), datasetType 

2005 

2006 

2007class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

2008 """InMemoryDatastore specialization of a butler""" 

2009 

2010 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

2011 fullConfigKey = None 

2012 useTempRoot = False 

2013 validationCanFail = False 

2014 datastoreStr = ["datastore='InMemory"] 

2015 datastoreName = ["InMemoryDatastore@"] 

2016 registryStr = "/gen3.sqlite3" 

2017 

2018 def testIngest(self) -> None: 

2019 pass 

2020 

2021 

2022class ClonedSqliteButlerTestCase(InMemoryDatastoreButlerTestCase, unittest.TestCase): 

2023 """Test that a Butler with a Sqlite registry still works after cloning.""" 

2024 

2025 def create_butler( 

2026 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

2027 ) -> tuple[DirectButler, DatasetType]: 

2028 butler, datasetType = super().create_butler(run, storageClass, datasetTypeName) 

2029 return butler._clone(run=run), datasetType 

2030 

2031 

2032class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

2033 """PosixDatastore specialization""" 

2034 

2035 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2036 fullConfigKey = ".datastore.datastores.1.formatters" 

2037 validationCanFail = True 

2038 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

2039 datastoreName = [ 

2040 "InMemoryDatastore@", 

2041 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

2042 "SecondDatastore", 

2043 ] 

2044 registryStr = "/gen3.sqlite3" 

2045 

2046 def testPruneDatasets(self) -> None: 

2047 # This test relies on manipulating files out-of-band, which is 

2048 # impossible for this configuration because of the InMemoryDatastore in 

2049 # the ChainedDatastore. 

2050 pass 

2051 

2052 

2053class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

2054 """Test that a yaml file in one location can refer to a root in another.""" 

2055 

2056 datastoreStr = ["dir1"] 

2057 # Disable the makeRepo test since we are deliberately not using 

2058 # butler.yaml as the config name. 

2059 fullConfigKey = None 

2060 

2061 def setUp(self) -> None: 

2062 self.root = makeTestTempDir(TESTDIR) 

2063 

2064 # Make a new repository in one place 

2065 self.dir1 = os.path.join(self.root, "dir1") 

2066 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

2067 

2068 # Move the yaml file to a different place and add a "root" 

2069 self.dir2 = os.path.join(self.root, "dir2") 

2070 os.makedirs(self.dir2, exist_ok=True) 

2071 configFile1 = os.path.join(self.dir1, "butler.yaml") 

2072 config = Config(configFile1) 

2073 config["root"] = self.dir1 

2074 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

2075 config.dumpToUri(configFile2) 

2076 os.remove(configFile1) 

2077 self.tmpConfigFile = configFile2 

2078 

2079 def testFileLocations(self) -> None: 

2080 self.assertNotEqual(self.dir1, self.dir2) 

2081 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

2082 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

2083 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

2084 

2085 

2086class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

2087 """Test that a config file created by makeRepo outside of repo works.""" 

2088 

2089 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2090 

2091 def setUp(self) -> None: 

2092 self.root = makeTestTempDir(TESTDIR) 

2093 self.root2 = makeTestTempDir(TESTDIR) 

2094 

2095 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

2096 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

2097 

2098 def tearDown(self) -> None: 

2099 if os.path.exists(self.root2): 

2100 shutil.rmtree(self.root2, ignore_errors=True) 

2101 super().tearDown() 

2102 

2103 def testConfigExistence(self) -> None: 

2104 c = Config(self.tmpConfigFile) 

2105 uri_config = ResourcePath(c["root"]) 

2106 uri_expected = ResourcePath(self.root, forceDirectory=True) 

2107 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

2108 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

2109 

2110 def testPutGet(self) -> None: 

2111 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

2112 self.runPutGetTest(storageClass, "test_metric") 

2113 

2114 

2115class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

2116 """Test that a config file created by makeRepo outside of repo works.""" 

2117 

2118 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2119 

2120 def setUp(self) -> None: 

2121 self.root = makeTestTempDir(TESTDIR) 

2122 self.root2 = makeTestTempDir(TESTDIR) 

2123 

2124 self.tmpConfigFile = self.root2 

2125 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

2126 

2127 def testConfigExistence(self) -> None: 

2128 # Append the yaml file else Config constructor does not know the file 

2129 # type. 

2130 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

2131 super().testConfigExistence() 

2132 

2133 

2134class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

2135 """Test that a config file created by makeRepo outside of repo works.""" 

2136 

2137 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2138 

2139 def setUp(self) -> None: 

2140 self.root = makeTestTempDir(TESTDIR) 

2141 self.root2 = makeTestTempDir(TESTDIR) 

2142 

2143 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

2144 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

2145 

2146 

2147@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

2148class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

2149 """S3Datastore specialization of a butler; an S3 storage Datastore + 

2150 a local in-memory SqlRegistry. 

2151 """ 

2152 

2153 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

2154 fullConfigKey = None 

2155 validationCanFail = True 

2156 

2157 bucketName = "anybucketname" 

2158 """Name of the Bucket that will be used in the tests. The name is read from 

2159 the config file used with the tests during set-up. 

2160 """ 

2161 

2162 root = "butlerRoot/" 

2163 """Root repository directory expected to be used in case useTempRoot=False. 

2164 Otherwise the root is set to a 20 characters long randomly generated string 

2165 during set-up. 

2166 """ 

2167 

2168 datastoreStr = [f"datastore={root}"] 

2169 """Contains all expected root locations in a format expected to be 

2170 returned by Butler stringification. 

2171 """ 

2172 

2173 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

2174 """The expected format of the S3 Datastore string.""" 

2175 

2176 registryStr = "/gen3.sqlite3" 

2177 """Expected format of the Registry string.""" 

2178 

2179 mock_aws = mock_aws() 

2180 """The mocked s3 interface from moto.""" 

2181 

2182 def genRoot(self) -> str: 

2183 """Return a random string of len 20 to serve as a root 

2184 name for the temporary bucket repo. 

2185 

2186 This is equivalent to tempfile.mkdtemp as this is what self.root 

2187 becomes when useTempRoot is True. 

2188 """ 

2189 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

2190 return rndstr + "/" 

2191 

2192 def setUp(self) -> None: 

2193 config = Config(self.configFile) 

2194 uri = ResourcePath(config[".datastore.datastore.root"]) 

2195 self.bucketName = uri.netloc 

2196 

2197 # Enable S3 mocking of tests. 

2198 self.enterContext(clean_test_environment_for_s3()) 

2199 self.mock_aws.start() 

2200 

2201 if self.useTempRoot: 

2202 self.root = self.genRoot() 

2203 rooturi = f"s3://{self.bucketName}/{self.root}" 

2204 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

2205 

2206 # need local folder to store registry database 

2207 self.reg_dir = makeTestTempDir(TESTDIR) 

2208 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

2209 

2210 # MOTO needs to know that we expect Bucket bucketname to exist 

2211 # (this used to be the class attribute bucketName) 

2212 s3 = boto3.resource("s3") 

2213 s3.create_bucket(Bucket=self.bucketName) 

2214 

2215 self.datastoreStr = [f"datastore='{rooturi}'"] 

2216 self.datastoreName = [f"FileDatastore@{rooturi}"] 

2217 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

2218 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

2219 

2220 def tearDown(self) -> None: 

2221 s3 = boto3.resource("s3") 

2222 bucket = s3.Bucket(self.bucketName) 

2223 try: 

2224 bucket.objects.all().delete() 

2225 except botocore.exceptions.ClientError as e: 

2226 if e.response["Error"]["Code"] == "404": 

2227 # the key was not reachable - pass 

2228 pass 

2229 else: 

2230 raise 

2231 

2232 bucket = s3.Bucket(self.bucketName) 

2233 bucket.delete() 

2234 

2235 # Stop the S3 mock. 

2236 self.mock_aws.stop() 

2237 

2238 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

2239 shutil.rmtree(self.reg_dir, ignore_errors=True) 

2240 

2241 if self.useTempRoot and os.path.exists(self.root): 

2242 shutil.rmtree(self.root, ignore_errors=True) 

2243 

2244 super().tearDown() 

2245 

2246 

2247class PosixDatastoreTransfers(unittest.TestCase): 

2248 """Test data transfers between butlers. 

2249 

2250 Test for different managers. UUID to UUID and integer to integer are 

2251 tested. UUID to integer is not supported since we do not currently 

2252 want to allow that. Integer to UUID is supported with the caveat 

2253 that UUID4 will be generated and this will be incorrect for raw 

2254 dataset types. The test ignores that. 

2255 """ 

2256 

2257 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2258 storageClassFactory: StorageClassFactory 

2259 

2260 @classmethod 

2261 def setUpClass(cls) -> None: 

2262 cls.storageClassFactory = StorageClassFactory() 

2263 cls.storageClassFactory.addFromConfig(cls.configFile) 

2264 

2265 def setUp(self) -> None: 

2266 self.root = makeTestTempDir(TESTDIR) 

2267 self.config = Config(self.configFile) 

2268 

2269 def tearDown(self) -> None: 

2270 removeTestTempDir(self.root) 

2271 

2272 def create_butler(self, manager: str, label: str) -> Butler: 

2273 config = Config(self.configFile) 

2274 config["registry", "managers", "datasets"] = manager 

2275 return Butler.from_config( 

2276 Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True 

2277 ) 

2278 

2279 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

2280 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

2281 if manager1 is None: 

2282 manager1 = default 

2283 if manager2 is None: 

2284 manager2 = default 

2285 self.source_butler = self.create_butler(manager1, "1") 

2286 self.target_butler = self.create_butler(manager2, "2") 

2287 

2288 def testTransferUuidToUuid(self) -> None: 

2289 self.create_butlers() 

2290 self.assertButlerTransfers() 

2291 

2292 def testTransferMissing(self) -> None: 

2293 """Test transfers where datastore records are missing. 

2294 

2295 This is how execution butler works. 

2296 """ 

2297 self.create_butlers() 

2298 

2299 # Configure the source butler to allow trust. 

2300 self.source_butler._datastore._set_trust_mode(True) 

2301 

2302 self.assertButlerTransfers(purge=True) 

2303 

2304 def testTransferMissingDisassembly(self) -> None: 

2305 """Test transfers where datastore records are missing. 

2306 

2307 This is how execution butler works. 

2308 """ 

2309 self.create_butlers() 

2310 

2311 # Configure the source butler to allow trust. 

2312 self.source_butler._datastore._set_trust_mode(True) 

2313 

2314 # Test disassembly. 

2315 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2316 

2317 def testAbsoluteURITransferDirect(self) -> None: 

2318 """Test transfer using an absolute URI.""" 

2319 self._absolute_transfer("auto") 

2320 

2321 def testAbsoluteURITransferCopy(self) -> None: 

2322 """Test transfer using an absolute URI.""" 

2323 self._absolute_transfer("copy") 

2324 

2325 def _absolute_transfer(self, transfer: str) -> None: 

2326 self.create_butlers() 

2327 

2328 storageClassName = "StructuredData" 

2329 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2330 datasetTypeName = "random_data" 

2331 run = "run1" 

2332 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2333 

2334 dimensions = self.source_butler.dimensions.conform(()) 

2335 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2336 self.source_butler.registry.registerDatasetType(datasetType) 

2337 

2338 metrics = makeExampleMetrics() 

2339 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2340 dataId = DataCoordinate.make_empty(self.source_butler.dimensions) 

2341 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2342 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2343 dataset = FileDataset(path=temp, refs=source_refs) 

2344 self.source_butler.ingest(dataset, transfer="direct") 

2345 

2346 self.target_butler.transfer_from( 

2347 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2348 ) 

2349 

2350 uri = self.target_butler.getURI(dataset.refs[0]) 

2351 if transfer == "auto": 

2352 self.assertEqual(uri, temp) 

2353 else: 

2354 self.assertNotEqual(uri, temp) 

2355 

2356 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2357 """Test that a run can be transferred to another butler.""" 

2358 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2359 datasetTypeName = "random_data" 

2360 

2361 # Test will create 3 collections and we will want to transfer 

2362 # two of those three. 

2363 runs = ["run1", "run2", "other"] 

2364 

2365 # Also want to use two different dataset types to ensure that 

2366 # grouping works. 

2367 datasetTypeNames = ["random_data", "random_data_2"] 

2368 

2369 # Create the run collections in the source butler. 

2370 for run in runs: 

2371 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2372 

2373 # Create dimensions in source butler. 

2374 n_exposures = 30 

2375 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2376 self.source_butler.registry.insertDimensionData( 

2377 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2378 ) 

2379 self.source_butler.registry.insertDimensionData( 

2380 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2381 ) 

2382 self.source_butler.registry.insertDimensionData( 

2383 "day_obs", 

2384 { 

2385 "instrument": "DummyCamComp", 

2386 "id": 20250101, 

2387 }, 

2388 ) 

2389 

2390 for i in range(n_exposures): 

2391 self.source_butler.registry.insertDimensionData( 

2392 "group", {"instrument": "DummyCamComp", "name": f"group{i}"} 

2393 ) 

2394 self.source_butler.registry.insertDimensionData( 

2395 "exposure", 

2396 { 

2397 "instrument": "DummyCamComp", 

2398 "id": i, 

2399 "obs_id": f"exp{i}", 

2400 "physical_filter": "d-r", 

2401 "group": f"group{i}", 

2402 "day_obs": 20250101, 

2403 }, 

2404 ) 

2405 

2406 # Create dataset types in the source butler. 

2407 dimensions = self.source_butler.dimensions.conform(["instrument", "exposure"]) 

2408 for datasetTypeName in datasetTypeNames: 

2409 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2410 self.source_butler.registry.registerDatasetType(datasetType) 

2411 

2412 # Write a dataset to an unrelated run -- this will ensure that 

2413 # we are rewriting integer dataset ids in the target if necessary. 

2414 # Will not be relevant for UUID. 

2415 run = "distraction" 

2416 butler = Butler.from_config(butler=self.source_butler, run=run) 

2417 butler.put( 

2418 makeExampleMetrics(), 

2419 datasetTypeName, 

2420 exposure=1, 

2421 instrument="DummyCamComp", 

2422 physical_filter="d-r", 

2423 ) 

2424 

2425 # Write some example metrics to the source 

2426 butler = Butler.from_config(butler=self.source_butler) 

2427 

2428 # Set of DatasetRefs that should be in the list of refs to transfer 

2429 # but which will not be transferred. 

2430 deleted: set[DatasetRef] = set() 

2431 

2432 n_expected = 20 # Number of datasets expected to be transferred 

2433 source_refs = [] 

2434 for i in range(n_exposures): 

2435 # Put a third of datasets into each collection, only retain 

2436 # two thirds. 

2437 index = i % 3 

2438 run = runs[index] 

2439 datasetTypeName = datasetTypeNames[i % 2] 

2440 

2441 metric = MetricsExample( 

2442 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2443 ) 

2444 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2445 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2446 

2447 # Remove the datastore record using low-level API, but only 

2448 # for a specific index. 

2449 if purge and index == 1: 

2450 # For one of these delete the file as well. 

2451 # This allows the "missing" code to filter the 

2452 # file out. 

2453 # Access the individual datastores. 

2454 datastores = [] 

2455 if hasattr(butler._datastore, "datastores"): 

2456 datastores.extend(butler._datastore.datastores) 

2457 else: 

2458 datastores.append(butler._datastore) 

2459 

2460 if not deleted: 

2461 # For a chained datastore we need to remove 

2462 # files in each chain. 

2463 for datastore in datastores: 

2464 # The file might not be known to the datastore 

2465 # if constraints are used. 

2466 try: 

2467 primary, uris = datastore.getURIs(ref) 

2468 except FileNotFoundError: 

2469 continue 

2470 if primary and primary.scheme != "mem": 

2471 primary.remove() 

2472 for uri in uris.values(): 

2473 if uri.scheme != "mem": 

2474 uri.remove() 

2475 n_expected -= 1 

2476 deleted.add(ref) 

2477 

2478 # Remove the datastore record. 

2479 for datastore in datastores: 

2480 if hasattr(datastore, "removeStoredItemInfo"): 

2481 datastore.removeStoredItemInfo(ref) 

2482 

2483 if index < 2: 

2484 source_refs.append(ref) 

2485 if ref not in deleted: 

2486 new_metric = butler.get(ref) 

2487 self.assertEqual(new_metric, metric) 

2488 

2489 # Create some bad dataset types to ensure we check for inconsistent 

2490 # definitions. 

2491 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2492 for datasetTypeName in datasetTypeNames: 

2493 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2494 self.target_butler.registry.registerDatasetType(datasetType) 

2495 with self.assertRaises(ConflictingDefinitionError) as cm: 

2496 self.target_butler.transfer_from(self.source_butler, source_refs) 

2497 self.assertIn("dataset type differs", str(cm.exception)) 

2498 

2499 # And remove the bad definitions. 

2500 for datasetTypeName in datasetTypeNames: 

2501 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2502 

2503 # Transfer without creating dataset types should fail. 

2504 with self.assertRaises(KeyError): 

2505 self.target_butler.transfer_from(self.source_butler, source_refs) 

2506 

2507 # Transfer without creating dimensions should fail. 

2508 with self.assertRaises(ConflictingDefinitionError) as cm: 

2509 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2510 self.assertIn("dimension", str(cm.exception)) 

2511 

2512 # The failed transfer above leaves registry in an inconsistent 

2513 # state because the run is created but then rolled back without 

2514 # the collection cache being cleared. For now force a refresh. 

2515 # Can remove with DM-35498. 

2516 self.target_butler.registry.refresh() 

2517 

2518 # Do a dry run -- this should not have any effect on the target butler. 

2519 self.target_butler.transfer_from(self.source_butler, source_refs, dry_run=True) 

2520 

2521 # Transfer the records for one ref to test the alternative API. 

2522 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2523 self.target_butler.transfer_dimension_records_from(self.source_butler, [source_refs[0]]) 

2524 self.assertIn("number of records transferred: 1", ";".join(log_cm.output)) 

2525 

2526 # Now transfer them to the second butler, including dimensions. 

2527 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2528 transferred = self.target_butler.transfer_from( 

2529 self.source_butler, 

2530 source_refs, 

2531 register_dataset_types=True, 

2532 transfer_dimensions=True, 

2533 ) 

2534 self.assertEqual(len(transferred), n_expected) 

2535 log_output = ";".join(log_cm.output) 

2536 

2537 # A ChainedDatastore will use the in-memory datastore for mexists 

2538 # so we can not rely on the mexists log message. 

2539 self.assertIn("Number of datastore records found in source", log_output) 

2540 self.assertIn("Creating output run", log_output) 

2541 

2542 # Do the transfer twice to ensure that it will do nothing extra. 

2543 # Only do this if purge=True because it does not work for int 

2544 # dataset_id. 

2545 if purge: 

2546 # This should not need to register dataset types. 

2547 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2548 self.assertEqual(len(transferred), n_expected) 

2549 

2550 # Also do an explicit low-level transfer to trigger some 

2551 # edge cases. 

2552 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2553 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs) 

2554 log_output = ";".join(log_cm.output) 

2555 self.assertIn("no file artifacts exist", log_output) 

2556 

2557 with self.assertRaises((TypeError, AttributeError)): 

2558 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2559 

2560 with self.assertRaises(ValueError): 

2561 self.target_butler._datastore.transfer_from( 

2562 self.source_butler._datastore, source_refs, transfer="split" 

2563 ) 

2564 

2565 # Now try to get the same refs from the new butler. 

2566 for ref in source_refs: 

2567 if ref not in deleted: 

2568 new_metric = self.target_butler.get(ref) 

2569 old_metric = self.source_butler.get(ref) 

2570 self.assertEqual(new_metric, old_metric) 

2571 

2572 # Now prune run2 collection and create instead a CHAINED collection. 

2573 # This should block the transfer. 

2574 self.target_butler.removeRuns(["run2"], unstore=True) 

2575 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2576 with self.assertRaises(CollectionTypeError): 

2577 # Re-importing the run1 datasets can be problematic if they 

2578 # use integer IDs so filter those out. 

2579 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2580 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2581 

2582 

2583class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2584 """Test transfers using a chained datastore.""" 

2585 

2586 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2587 

2588 

2589class NullDatastoreTestCase(unittest.TestCase): 

2590 """Test that we can fall back to a null datastore.""" 

2591 

2592 # Need a good config to create the repo. 

2593 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2594 storageClassFactory: StorageClassFactory 

2595 

2596 @classmethod 

2597 def setUpClass(cls) -> None: 

2598 cls.storageClassFactory = StorageClassFactory() 

2599 cls.storageClassFactory.addFromConfig(cls.configFile) 

2600 

2601 def setUp(self) -> None: 

2602 """Create a new butler root for each test.""" 

2603 self.root = makeTestTempDir(TESTDIR) 

2604 Butler.makeRepo(self.root, config=Config(self.configFile)) 

2605 

2606 def tearDown(self) -> None: 

2607 removeTestTempDir(self.root) 

2608 

2609 def test_fallback(self) -> None: 

2610 # Read the butler config and mess with the datastore section. 

2611 config_path = os.path.join(self.root, "butler.yaml") 

2612 bad_config = Config(config_path) 

2613 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" 

2614 bad_config.dumpToUri(config_path) 

2615 

2616 with self.assertRaises(RuntimeError): 

2617 Butler(self.root, without_datastore=False) 

2618 

2619 with self.assertRaises(RuntimeError): 

2620 Butler.from_config(self.root, without_datastore=False) 

2621 

2622 butler = Butler.from_config(self.root, writeable=True, without_datastore=True) 

2623 self.assertIsInstance(butler._datastore, NullDatastore) 

2624 

2625 # Check that registry is working. 

2626 butler.registry.registerRun("MYRUN") 

2627 collections = butler.registry.queryCollections(...) 

2628 self.assertIn("MYRUN", set(collections)) 

2629 

2630 # Create a ref. 

2631 dimensions = butler.dimensions.conform([]) 

2632 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2633 datasetTypeName = "metric" 

2634 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2635 butler.registry.registerDatasetType(datasetType) 

2636 ref = DatasetRef(datasetType, {}, run="MYRUN") 

2637 

2638 # Check that datastore will complain. 

2639 with self.assertRaises(FileNotFoundError): 

2640 butler.get(ref) 

2641 with self.assertRaises(FileNotFoundError): 

2642 butler.getURI(ref) 

2643 

2644 

2645@unittest.skipIf(create_test_server is None, "Server dependencies not installed.") 

2646class ButlerServerTests(FileDatastoreButlerTests, unittest.TestCase): 

2647 """Test RemoteButler and Butler server.""" 

2648 

2649 configFile = None 

2650 predictionSupported = False 

2651 trustModeSupported = False 

2652 

2653 def setUp(self): 

2654 self.server_instance = self.enterContext(create_test_server(TESTDIR)) 

2655 

2656 def tearDown(self): 

2657 pass 

2658 

2659 def are_uris_equivalent(self, uri1: ResourcePath, uri2: ResourcePath) -> bool: 

2660 # S3 pre-signed URLs may end up with differing expiration times in the 

2661 # query parameters, so ignore query parameters when comparing. 

2662 return uri1.scheme == uri2.scheme and uri1.netloc == uri2.netloc and uri1.path == uri2.path 

2663 

2664 def create_empty_butler(self, run: str | None = None, writeable: bool | None = None) -> Butler: 

2665 return self.server_instance.hybrid_butler._clone(run=run) 

2666 

2667 def remove_dataset_out_of_band(self, butler: Butler, ref: DatasetRef) -> None: 

2668 # Can't delete a file via S3 signed URLs, so we need to reach in 

2669 # through DirectButler to delete the dataset. 

2670 uri = self.server_instance.direct_butler.getURI(ref) 

2671 uri.remove() 

2672 

2673 def testConstructor(self): 

2674 # RemoteButler constructor is tested in test_server.py and 

2675 # test_remote_butler.py. 

2676 pass 

2677 

2678 def testDafButlerRepositories(self): 

2679 # Loading of RemoteButler via repository index is tested in 

2680 # test_server.py. 

2681 pass 

2682 

2683 def testGetDatasetTypes(self) -> None: 

2684 # This is mostly a test of validateConfiguration, which is for 

2685 # validating Datastore configuration and thus isn't relevant to 

2686 # RemoteButler. 

2687 pass 

2688 

2689 def testMakeRepo(self) -> None: 

2690 # Only applies to DirectButler. 

2691 pass 

2692 

2693 # Pickling not yet implemented for RemoteButler/HybridButler. 

2694 @unittest.expectedFailure 

2695 def testPickle(self) -> None: 

2696 return super().testPickle() 

2697 

2698 def testStringification(self) -> None: 

2699 self.assertEqual( 

2700 str(self.server_instance.remote_butler), 

2701 "RemoteButler(https://test.example/api/butler/repo/testrepo)", 

2702 ) 

2703 

2704 def testTransaction(self) -> None: 

2705 # Transactions will never be supported for RemoteButler. 

2706 pass 

2707 

2708 def testPutTemplates(self) -> None: 

2709 # The Butler server instance is configured with different file naming 

2710 # templates than this test is expecting. 

2711 pass 

2712 

2713 

2714def setup_module(module: types.ModuleType) -> None: 

2715 """Set up the module for pytest.""" 

2716 clean_environment() 

2717 

2718 

2719if __name__ == "__main__": 

2720 clean_environment() 

2721 unittest.main()