Coverage for tests/test_butler.py: 15%

1415 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-04 02:55 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Tests for Butler. 

29""" 

30from __future__ import annotations 

31 

32import gc 

33import json 

34import logging 

35import os 

36import pathlib 

37import pickle 

38import posixpath 

39import random 

40import shutil 

41import string 

42import tempfile 

43import unittest 

44import uuid 

45from collections.abc import Mapping 

46from typing import TYPE_CHECKING, Any, cast 

47 

48try: 

49 import boto3 

50 import botocore 

51 from lsst.resources.s3utils import clean_test_environment_for_s3 

52 

53 try: 

54 from moto import mock_aws # v5 

55 except ImportError: 

56 from moto import mock_s3 as mock_aws 

57except ImportError: 

58 boto3 = None 

59 

60 def mock_aws(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] 

61 """No-op decorator in case moto mock_aws can not be imported.""" 

62 return None 

63 

64 

65try: 

66 from lsst.daf.butler.tests.server import create_test_server 

67except ImportError: 

68 create_test_server = None 

69 

70try: 

71 # It's possible but silly to have testing.postgresql installed without 

72 # having the postgresql server installed (because then nothing in 

73 # testing.postgresql would work), so we use the presence of that module 

74 # to test whether we can expect the server to be available. 

75 import testing.postgresql # type: ignore[import] 

76except ImportError: 

77 testing = None 

78 

79import astropy.time 

80import sqlalchemy 

81from lsst.daf.butler import ( 

82 Butler, 

83 ButlerConfig, 

84 ButlerRepoIndex, 

85 CollectionType, 

86 Config, 

87 DataCoordinate, 

88 DatasetExistence, 

89 DatasetNotFoundError, 

90 DatasetRef, 

91 DatasetType, 

92 FileDataset, 

93 NoDefaultCollectionError, 

94 StorageClassFactory, 

95 ValidationError, 

96 script, 

97) 

98from lsst.daf.butler.datastore import NullDatastore 

99from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError 

100from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

101from lsst.daf.butler.direct_butler import DirectButler 

102from lsst.daf.butler.registry import ( 

103 CollectionError, 

104 CollectionTypeError, 

105 ConflictingDefinitionError, 

106 DataIdValueError, 

107 MissingCollectionError, 

108 OrphanedRecordError, 

109) 

110from lsst.daf.butler.registry.sql_registry import SqlRegistry 

111from lsst.daf.butler.repo_relocation import BUTLER_ROOT_TAG 

112from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

113from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

114from lsst.resources import ResourcePath 

115from lsst.utils import doImportType 

116from lsst.utils.introspection import get_full_type_name 

117 

118if TYPE_CHECKING: 

119 import types 

120 

121 from lsst.daf.butler import DimensionGroup, Registry, StorageClass 

122 

123TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

124 

125 

126def clean_environment() -> None: 

127 """Remove external environment variables that affect the tests.""" 

128 for k in ("DAF_BUTLER_REPOSITORY_INDEX",): 

129 os.environ.pop(k, None) 

130 

131 

132def makeExampleMetrics() -> MetricsExample: 

133 """Return example dataset suitable for tests.""" 

134 return MetricsExample( 

135 {"AM1": 5.2, "AM2": 30.6}, 

136 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

137 [563, 234, 456.7, 752, 8, 9, 27], 

138 ) 

139 

140 

141class TransactionTestError(Exception): 

142 """Specific error for testing transactions, to prevent misdiagnosing 

143 that might otherwise occur when a standard exception is used. 

144 """ 

145 

146 pass 

147 

148 

149class ButlerConfigTests(unittest.TestCase): 

150 """Simple tests for ButlerConfig that are not tested in any other test 

151 cases. 

152 """ 

153 

154 def testSearchPath(self) -> None: 

155 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

156 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

157 config1 = ButlerConfig(configFile) 

158 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

159 

160 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

161 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

162 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

163 self.assertIn("testConfigs", "\n".join(cm.output)) 

164 

165 key = ("datastore", "records", "table") 

166 self.assertNotEqual(config1[key], config2[key]) 

167 self.assertEqual(config2[key], "override_record") 

168 

169 

170class ButlerPutGetTests(TestCaseMixin): 

171 """Helper method for running a suite of put/get tests from different 

172 butler configurations. 

173 """ 

174 

175 root: str 

176 default_run = "ingésτ😺" 

177 storageClassFactory: StorageClassFactory 

178 configFile: str | None 

179 tmpConfigFile: str 

180 

181 @staticmethod 

182 def addDatasetType( 

183 datasetTypeName: str, dimensions: DimensionGroup, storageClass: StorageClass | str, registry: Registry 

184 ) -> DatasetType: 

185 """Create a DatasetType and register it""" 

186 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

187 registry.registerDatasetType(datasetType) 

188 return datasetType 

189 

190 @classmethod 

191 def setUpClass(cls) -> None: 

192 cls.storageClassFactory = StorageClassFactory() 

193 if cls.configFile is not None: 

194 cls.storageClassFactory.addFromConfig(cls.configFile) 

195 

196 def assertGetComponents( 

197 self, 

198 butler: Butler, 

199 datasetRef: DatasetRef, 

200 components: tuple[str, ...], 

201 reference: Any, 

202 collections: Any = None, 

203 ) -> None: 

204 datasetType = datasetRef.datasetType 

205 dataId = datasetRef.dataId 

206 deferred = butler.getDeferred(datasetRef) 

207 

208 for component in components: 

209 compTypeName = datasetType.componentTypeName(component) 

210 result = butler.get(compTypeName, dataId, collections=collections) 

211 self.assertEqual(result, getattr(reference, component)) 

212 result_deferred = deferred.get(component=component) 

213 self.assertEqual(result_deferred, result) 

214 

215 def tearDown(self) -> None: 

216 if self.root is not None: 

217 removeTestTempDir(self.root) 

218 

219 def create_empty_butler(self, run: str | None = None, writeable: bool | None = None): 

220 """Create a Butler for the test repository, without inserting test 

221 data. 

222 """ 

223 butler = Butler.from_config(self.tmpConfigFile, run=run, writeable=writeable) 

224 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

225 return butler 

226 

227 def create_butler( 

228 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

229 ) -> tuple[Butler, DatasetType]: 

230 """Create a Butler for the test repository and insert some test data 

231 into it. 

232 """ 

233 butler = self.create_empty_butler(run=run) 

234 

235 collections = set(butler.registry.queryCollections()) 

236 self.assertEqual(collections, {run}) 

237 # Create and register a DatasetType 

238 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

239 

240 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

241 

242 # Add needed Dimensions 

243 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

244 butler.registry.insertDimensionData( 

245 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

246 ) 

247 butler.registry.insertDimensionData( 

248 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

249 ) 

250 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20200101}) 

251 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

252 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

253 butler.registry.insertDimensionData( 

254 "visit", 

255 { 

256 "instrument": "DummyCamComp", 

257 "id": 423, 

258 "name": "fourtwentythree", 

259 "physical_filter": "d-r", 

260 "datetime_begin": visit_start, 

261 "datetime_end": visit_end, 

262 "day_obs": 20200101, 

263 }, 

264 ) 

265 

266 # Add more visits for some later tests 

267 for visit_id in (424, 425): 

268 butler.registry.insertDimensionData( 

269 "visit", 

270 { 

271 "instrument": "DummyCamComp", 

272 "id": visit_id, 

273 "name": f"fourtwentyfour_{visit_id}", 

274 "physical_filter": "d-r", 

275 "day_obs": 20200101, 

276 }, 

277 ) 

278 return butler, datasetType 

279 

280 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: 

281 # New datasets will be added to run and tag, but we will only look in 

282 # tag when looking up datasets. 

283 run = self.default_run 

284 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

285 assert butler.run is not None 

286 

287 # Create and store a dataset 

288 metric = makeExampleMetrics() 

289 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

290 

291 # Dataset should not exist if we haven't added it 

292 with self.assertRaises(DatasetNotFoundError): 

293 butler.get(datasetTypeName, dataId) 

294 

295 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

296 # and once with a DatasetType 

297 

298 # Keep track of any collections we add and do not clean up 

299 expected_collections = {run} 

300 

301 counter = 0 

302 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

303 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

304 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

305 # Since we are using subTest we can get cascading failures 

306 # here with the first attempt failing and the others failing 

307 # immediately because the dataset already exists. Work around 

308 # this by using a distinct run collection each time 

309 counter += 1 

310 this_run = f"put_run_{counter}" 

311 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

312 expected_collections.update({this_run}) 

313 

314 with self.subTest(args=args): 

315 kwargs: dict[str, Any] = {} 

316 if not isinstance(args[0], DatasetRef): # type: ignore 

317 kwargs["run"] = this_run 

318 ref = butler.put(metric, *args, **kwargs) 

319 self.assertIsInstance(ref, DatasetRef) 

320 

321 # Test get of a ref. 

322 metricOut = butler.get(ref) 

323 self.assertEqual(metric, metricOut) 

324 # Test get 

325 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

326 self.assertEqual(metric, metricOut) 

327 # Test get with a datasetRef 

328 metricOut = butler.get(ref) 

329 self.assertEqual(metric, metricOut) 

330 # Test getDeferred with dataId 

331 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

332 self.assertEqual(metric, metricOut) 

333 # Test getDeferred with a ref 

334 metricOut = butler.getDeferred(ref).get() 

335 self.assertEqual(metric, metricOut) 

336 

337 # Check we can get components 

338 if storageClass.isComposite(): 

339 self.assertGetComponents( 

340 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

341 ) 

342 

343 primary_uri, secondary_uris = butler.getURIs(ref) 

344 n_uris = len(secondary_uris) 

345 if primary_uri: 

346 n_uris += 1 

347 

348 # Can the artifacts themselves be retrieved? 

349 if not butler._datastore.isEphemeral: 

350 # Create a temporary directory to hold the retrieved 

351 # artifacts. 

352 with tempfile.TemporaryDirectory( 

353 prefix="butler-artifacts-", ignore_cleanup_errors=True 

354 ) as artifact_root: 

355 root_uri = ResourcePath(artifact_root, forceDirectory=True) 

356 

357 for preserve_path in (True, False): 

358 destination = root_uri.join(f"{preserve_path}_{counter}/") 

359 log = logging.getLogger("lsst.x") 

360 log.warning("Using destination %s for args %s", destination, args) 

361 # Use copy so that we can test that overwrite 

362 # protection works (using "auto" for File URIs 

363 # would use hard links and subsequent transfer 

364 # would work because it knows they are the same 

365 # file). 

366 transferred = butler.retrieveArtifacts( 

367 [ref], destination, preserve_path=preserve_path, transfer="copy" 

368 ) 

369 self.assertGreater(len(transferred), 0) 

370 artifacts = list(ResourcePath.findFileResources([destination])) 

371 self.assertEqual(set(transferred), set(artifacts)) 

372 

373 for artifact in transferred: 

374 path_in_destination = artifact.relative_to(destination) 

375 self.assertIsNotNone(path_in_destination) 

376 assert path_in_destination is not None 

377 

378 # When path is not preserved there should not 

379 # be any path separators. 

380 num_seps = path_in_destination.count("/") 

381 if preserve_path: 

382 self.assertGreater(num_seps, 0) 

383 else: 

384 self.assertEqual(num_seps, 0) 

385 

386 self.assertEqual( 

387 len(artifacts), 

388 n_uris, 

389 "Comparing expected artifacts vs actual:" 

390 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

391 ) 

392 

393 if preserve_path: 

394 # No need to run these twice 

395 with self.assertRaises(ValueError): 

396 butler.retrieveArtifacts([ref], destination, transfer="move") 

397 

398 with self.assertRaisesRegex( 

399 ValueError, "^Destination location must refer to a directory" 

400 ): 

401 butler.retrieveArtifacts( 

402 [ref], ResourcePath("/some/file.txt", forceDirectory=False) 

403 ) 

404 

405 with self.assertRaises(FileExistsError): 

406 butler.retrieveArtifacts([ref], destination) 

407 

408 transferred_again = butler.retrieveArtifacts( 

409 [ref], destination, preserve_path=preserve_path, overwrite=True 

410 ) 

411 self.assertEqual(set(transferred_again), set(transferred)) 

412 

413 # Now remove the dataset completely. 

414 butler.pruneDatasets([ref], purge=True, unstore=True) 

415 # Lookup with original args should still fail. 

416 kwargs = {"collections": this_run} 

417 if isinstance(args[0], DatasetRef): 

418 kwargs = {} # Prevent warning from being issued. 

419 self.assertFalse(butler.exists(*args, **kwargs)) 

420 # get() should still fail. 

421 with self.assertRaises((FileNotFoundError, DatasetNotFoundError)): 

422 butler.get(ref) 

423 # Registry shouldn't be able to find it by dataset_id anymore. 

424 self.assertIsNone(butler.get_dataset(ref.id)) 

425 

426 # Do explicit registry removal since we know they are 

427 # empty 

428 butler.registry.removeCollection(this_run) 

429 expected_collections.remove(this_run) 

430 

431 # Create DatasetRef for put using default run. 

432 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

433 

434 # Check that getDeferred fails with standalone ref. 

435 with self.assertRaises(LookupError): 

436 butler.getDeferred(refIn) 

437 

438 # Put the dataset again, since the last thing we did was remove it 

439 # and we want to use the default collection. 

440 ref = butler.put(metric, refIn) 

441 

442 # Get with parameters 

443 stop = 4 

444 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

445 self.assertNotEqual(metric, sliced) 

446 self.assertEqual(metric.summary, sliced.summary) 

447 self.assertEqual(metric.output, sliced.output) 

448 assert metric.data is not None # for mypy 

449 self.assertEqual(metric.data[:stop], sliced.data) 

450 # getDeferred with parameters 

451 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

452 self.assertNotEqual(metric, sliced) 

453 self.assertEqual(metric.summary, sliced.summary) 

454 self.assertEqual(metric.output, sliced.output) 

455 self.assertEqual(metric.data[:stop], sliced.data) 

456 # getDeferred with deferred parameters 

457 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

458 self.assertNotEqual(metric, sliced) 

459 self.assertEqual(metric.summary, sliced.summary) 

460 self.assertEqual(metric.output, sliced.output) 

461 self.assertEqual(metric.data[:stop], sliced.data) 

462 

463 if storageClass.isComposite(): 

464 # Check that components can be retrieved 

465 metricOut = butler.get(ref.datasetType.name, dataId) 

466 compNameS = ref.datasetType.componentTypeName("summary") 

467 compNameD = ref.datasetType.componentTypeName("data") 

468 summary = butler.get(compNameS, dataId) 

469 self.assertEqual(summary, metric.summary) 

470 data = butler.get(compNameD, dataId) 

471 self.assertEqual(data, metric.data) 

472 

473 if "counter" in storageClass.derivedComponents: 

474 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

475 self.assertEqual(count, len(data)) 

476 

477 count = butler.get( 

478 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

479 ) 

480 self.assertEqual(count, stop) 

481 

482 compRef = butler.find_dataset(compNameS, dataId, collections=butler.collections) 

483 assert compRef is not None 

484 summary = butler.get(compRef) 

485 self.assertEqual(summary, metric.summary) 

486 

487 # Create a Dataset type that has the same name but is inconsistent. 

488 inconsistentDatasetType = DatasetType( 

489 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

490 ) 

491 

492 # Getting with a dataset type that does not match registry fails 

493 with self.assertRaisesRegex( 

494 ValueError, 

495 "(Supplied dataset type .* inconsistent with registry)" 

496 "|(The new storage class .* is not compatible with the existing storage class)", 

497 ): 

498 butler.get(inconsistentDatasetType, dataId) 

499 

500 # Combining a DatasetRef with a dataId should fail 

501 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

502 butler.get(ref, dataId) 

503 # Getting with an explicit ref should fail if the id doesn't match. 

504 with self.assertRaises((FileNotFoundError, DatasetNotFoundError)): 

505 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

506 

507 # Getting a dataset with unknown parameters should fail 

508 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

509 butler.get(ref, parameters={"unsupported": True}) 

510 

511 # Check we have a collection 

512 collections = set(butler.registry.queryCollections()) 

513 self.assertEqual(collections, expected_collections) 

514 

515 # Clean up to check that we can remove something that may have 

516 # already had a component removed 

517 butler.pruneDatasets([ref], unstore=True, purge=True) 

518 

519 # Add the same ref again, so we can check that duplicate put fails. 

520 ref = butler.put(metric, datasetType, dataId) 

521 

522 # Repeat put will fail. 

523 with self.assertRaisesRegex( 

524 ConflictingDefinitionError, "A database constraint failure was triggered" 

525 ): 

526 butler.put(metric, datasetType, dataId) 

527 

528 # Remove the datastore entry. 

529 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

530 

531 # Put will still fail 

532 with self.assertRaisesRegex( 

533 ConflictingDefinitionError, "A database constraint failure was triggered" 

534 ): 

535 butler.put(metric, datasetType, dataId) 

536 

537 # Repeat the same sequence with resolved ref. 

538 butler.pruneDatasets([ref], unstore=True, purge=True) 

539 ref = butler.put(metric, refIn) 

540 

541 # Repeat put will fail. 

542 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

543 butler.put(metric, refIn) 

544 

545 # Remove the datastore entry. 

546 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

547 

548 # In case of resolved ref this write will succeed. 

549 ref = butler.put(metric, refIn) 

550 

551 # Leave the dataset in place since some downstream tests require 

552 # something to be present 

553 

554 return butler 

555 

556 def testDeferredCollectionPassing(self) -> None: 

557 # Construct a butler with no run or collection, but make it writeable. 

558 butler = self.create_empty_butler(writeable=True) 

559 # Create and register a DatasetType 

560 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

561 datasetType = self.addDatasetType( 

562 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

563 ) 

564 # Add needed Dimensions 

565 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

566 butler.registry.insertDimensionData( 

567 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

568 ) 

569 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20250101}) 

570 butler.registry.insertDimensionData( 

571 "visit", 

572 { 

573 "instrument": "DummyCamComp", 

574 "id": 423, 

575 "name": "fourtwentythree", 

576 "physical_filter": "d-r", 

577 "day_obs": 20250101, 

578 }, 

579 ) 

580 dataId = {"instrument": "DummyCamComp", "visit": 423} 

581 # Create dataset. 

582 metric = makeExampleMetrics() 

583 # Register a new run and put dataset. 

584 run = "deferred" 

585 self.assertTrue(butler.registry.registerRun(run)) 

586 # Second time it will be allowed but indicate no-op 

587 self.assertFalse(butler.registry.registerRun(run)) 

588 ref = butler.put(metric, datasetType, dataId, run=run) 

589 # Putting with no run should fail with TypeError. 

590 with self.assertRaises(CollectionError): 

591 butler.put(metric, datasetType, dataId) 

592 # Dataset should exist. 

593 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

594 # We should be able to get the dataset back, but with and without 

595 # a deferred dataset handle. 

596 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

597 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

598 # Trying to find the dataset without any collection is an error. 

599 with self.assertRaises(NoDefaultCollectionError): 

600 butler.exists(datasetType, dataId) 

601 with self.assertRaises(CollectionError): 

602 butler.get(datasetType, dataId) 

603 # Associate the dataset with a different collection. 

604 butler.registry.registerCollection("tagged") 

605 butler.registry.associate("tagged", [ref]) 

606 # Deleting the dataset from the new collection should make it findable 

607 # in the original collection. 

608 butler.pruneDatasets([ref], tags=["tagged"]) 

609 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

610 

611 

612class ButlerTests(ButlerPutGetTests): 

613 """Tests for Butler.""" 

614 

615 useTempRoot = True 

616 validationCanFail: bool 

617 fullConfigKey: str | None 

618 registryStr: str | None 

619 datastoreName: list[str] | None 

620 datastoreStr: list[str] 

621 predictionSupported = True 

622 """Does getURIs support 'prediction mode'?""" 

623 

624 def setUp(self) -> None: 

625 """Create a new butler root for each test.""" 

626 self.root = makeTestTempDir(TESTDIR) 

627 Butler.makeRepo(self.root, config=Config(self.configFile)) 

628 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

629 

630 def are_uris_equivalent(self, uri1: ResourcePath, uri2: ResourcePath) -> bool: 

631 """Return True if two URIs refer to the same resource. 

632 

633 Subclasses may override to handle unique requirements. 

634 """ 

635 return uri1 == uri2 

636 

637 def testConstructor(self) -> None: 

638 """Independent test of constructor.""" 

639 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

640 self.assertIsInstance(butler, Butler) 

641 

642 # Check that butler.yaml is added automatically. 

643 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

644 config_dir = self.tmpConfigFile[: -len(end)] 

645 butler = Butler.from_config(config_dir, run=self.default_run) 

646 self.assertIsInstance(butler, Butler) 

647 

648 # Even with a ResourcePath. 

649 butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

650 self.assertIsInstance(butler, Butler) 

651 

652 collections = set(butler.registry.queryCollections()) 

653 self.assertEqual(collections, {self.default_run}) 

654 

655 # Check that some special characters can be included in run name. 

656 special_run = "u@b.c-A" 

657 butler_special = Butler.from_config(butler=butler, run=special_run) 

658 collections = set(butler_special.registry.queryCollections("*@*")) 

659 self.assertEqual(collections, {special_run}) 

660 

661 butler2 = Butler.from_config(butler=butler, collections=["other"]) 

662 self.assertEqual(butler2.collections, ("other",)) 

663 self.assertIsNone(butler2.run) 

664 self.assertEqual(type(butler._datastore), type(butler2._datastore)) 

665 self.assertEqual(butler._datastore.config, butler2._datastore.config) 

666 

667 # Test that we can use an environment variable to find this 

668 # repository. 

669 butler_index = Config() 

670 butler_index["label"] = self.tmpConfigFile 

671 for suffix in (".yaml", ".json"): 

672 # Ensure that the content differs so that we know that 

673 # we aren't reusing the cache. 

674 bad_label = f"file://bucket/not_real{suffix}" 

675 butler_index["bad_label"] = bad_label 

676 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

677 butler_index.dumpToUri(temp_file) 

678 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

679 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"}) 

680 uri = Butler.get_repo_uri("bad_label") 

681 self.assertEqual(uri, ResourcePath(bad_label)) 

682 uri = Butler.get_repo_uri("label") 

683 butler = Butler.from_config(uri, writeable=False) 

684 self.assertIsInstance(butler, Butler) 

685 butler = Butler.from_config("label", writeable=False) 

686 self.assertIsInstance(butler, Butler) 

687 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

688 Butler.from_config("not_there", writeable=False) 

689 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): 

690 Butler.from_config("bad_label") 

691 with self.assertRaises(FileNotFoundError): 

692 # Should ignore aliases. 

693 Butler.from_config(ResourcePath("label", forceAbsolute=False)) 

694 with self.assertRaises(KeyError) as cm: 

695 Butler.get_repo_uri("missing") 

696 self.assertEqual( 

697 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False) 

698 ) 

699 self.assertIn("not known to", str(cm.exception)) 

700 # Should report no failure. 

701 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "") 

702 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

703 # Now with empty configuration. 

704 butler_index = Config() 

705 butler_index.dumpToUri(temp_file) 

706 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

707 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): 

708 Butler.from_config("label") 

709 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

710 # Now with bad contents. 

711 with open(temp_file.ospath, "w") as fh: 

712 print("'", file=fh) 

713 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

714 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): 

715 Butler.from_config("label") 

716 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

717 with self.assertRaises(FileNotFoundError): 

718 Butler.get_repo_uri("label") 

719 self.assertEqual(Butler.get_known_repos(), set()) 

720 

721 with self.assertRaisesRegex(FileNotFoundError, "index file not found"): 

722 Butler.from_config("label") 

723 

724 # Check that we can create Butler when the alias file is not found. 

725 butler = Butler.from_config(self.tmpConfigFile, writeable=False) 

726 self.assertIsInstance(butler, Butler) 

727 with self.assertRaises(RuntimeError) as cm: 

728 # No environment variable set. 

729 Butler.get_repo_uri("label") 

730 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) 

731 self.assertIn("No repository index defined", str(cm.exception)) 

732 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): 

733 # No aliases registered. 

734 Butler.from_config("not_there") 

735 self.assertEqual(Butler.get_known_repos(), set()) 

736 

737 def testDafButlerRepositories(self): 

738 with unittest.mock.patch.dict( 

739 os.environ, 

740 {"DAF_BUTLER_REPOSITORIES": "label: 'https://someuri.com'\notherLabel: 'https://otheruri.com'\n"}, 

741 ): 

742 self.assertEqual(str(Butler.get_repo_uri("label")), "https://someuri.com") 

743 

744 with unittest.mock.patch.dict( 

745 os.environ, 

746 { 

747 "DAF_BUTLER_REPOSITORIES": "label: https://someuri.com", 

748 "DAF_BUTLER_REPOSITORY_INDEX": "https://someuri.com", 

749 }, 

750 ): 

751 with self.assertRaisesRegex(RuntimeError, "Only one of the environment variables"): 

752 Butler.get_repo_uri("label") 

753 

754 with unittest.mock.patch.dict( 

755 os.environ, 

756 {"DAF_BUTLER_REPOSITORIES": "invalid"}, 

757 ): 

758 with self.assertRaisesRegex(ValueError, "Repository index not in expected format"): 

759 Butler.get_repo_uri("label") 

760 

761 def testBasicPutGet(self) -> None: 

762 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

763 self.runPutGetTest(storageClass, "test_metric") 

764 

765 def testCompositePutGetConcrete(self) -> None: 

766 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

767 butler = self.runPutGetTest(storageClass, "test_metric") 

768 

769 # Should *not* be disassembled 

770 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

771 self.assertEqual(len(datasets), 1) 

772 uri, components = butler.getURIs(datasets[0]) 

773 self.assertIsInstance(uri, ResourcePath) 

774 self.assertFalse(components) 

775 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

776 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

777 

778 # Predicted dataset 

779 if self.predictionSupported: 

780 dataId = {"instrument": "DummyCamComp", "visit": 424} 

781 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

782 self.assertFalse(components) 

783 self.assertIsInstance(uri, ResourcePath) 

784 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

785 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

786 

787 def testCompositePutGetVirtual(self) -> None: 

788 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

789 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

790 

791 # Should be disassembled 

792 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

793 self.assertEqual(len(datasets), 1) 

794 uri, components = butler.getURIs(datasets[0]) 

795 

796 if butler._datastore.isEphemeral: 

797 # Never disassemble in-memory datastore 

798 self.assertIsInstance(uri, ResourcePath) 

799 self.assertFalse(components) 

800 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

801 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

802 else: 

803 self.assertIsNone(uri) 

804 self.assertEqual(set(components), set(storageClass.components)) 

805 for compuri in components.values(): 

806 self.assertIsInstance(compuri, ResourcePath) 

807 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

808 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

809 

810 if self.predictionSupported: 

811 # Predicted dataset 

812 dataId = {"instrument": "DummyCamComp", "visit": 424} 

813 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

814 

815 if butler._datastore.isEphemeral: 

816 # Never disassembled 

817 self.assertIsInstance(uri, ResourcePath) 

818 self.assertFalse(components) 

819 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

820 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

821 else: 

822 self.assertIsNone(uri) 

823 self.assertEqual(set(components), set(storageClass.components)) 

824 for compuri in components.values(): 

825 self.assertIsInstance(compuri, ResourcePath) 

826 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

827 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

828 

829 def testStorageClassOverrideGet(self) -> None: 

830 """Test storage class conversion on get with override.""" 

831 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

832 datasetTypeName = "anything" 

833 run = self.default_run 

834 

835 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

836 

837 # Create and store a dataset. 

838 metric = makeExampleMetrics() 

839 dataId = {"instrument": "DummyCamComp", "visit": 423} 

840 

841 ref = butler.put(metric, datasetType, dataId) 

842 

843 # Return native type. 

844 retrieved = butler.get(ref) 

845 self.assertEqual(retrieved, metric) 

846 

847 # Specify an override. 

848 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

849 model = butler.get(ref, storageClass=new_sc) 

850 self.assertNotEqual(type(model), type(retrieved)) 

851 self.assertIs(type(model), new_sc.pytype) 

852 self.assertEqual(retrieved, model) 

853 

854 # Defer but override later. 

855 deferred = butler.getDeferred(ref) 

856 model = deferred.get(storageClass=new_sc) 

857 self.assertIs(type(model), new_sc.pytype) 

858 self.assertEqual(retrieved, model) 

859 

860 # Defer but override up front. 

861 deferred = butler.getDeferred(ref, storageClass=new_sc) 

862 model = deferred.get() 

863 self.assertIs(type(model), new_sc.pytype) 

864 self.assertEqual(retrieved, model) 

865 

866 # Retrieve a component. Should be a tuple. 

867 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

868 self.assertIs(type(data), tuple) 

869 self.assertEqual(data, tuple(retrieved.data)) 

870 

871 # Parameter on the write storage class should work regardless 

872 # of read storage class. 

873 data = butler.get( 

874 "anything.data", 

875 dataId, 

876 storageClass="StructuredDataDataTestTuple", 

877 parameters={"slice": slice(2, 4)}, 

878 ) 

879 self.assertEqual(len(data), 2) 

880 

881 # Try a parameter that is known to the read storage class but not 

882 # the write storage class. 

883 with self.assertRaises(KeyError): 

884 butler.get( 

885 "anything.data", 

886 dataId, 

887 storageClass="StructuredDataDataTestTuple", 

888 parameters={"xslice": slice(2, 4)}, 

889 ) 

890 

891 def testPytypePutCoercion(self) -> None: 

892 """Test python type coercion on Butler.get and put.""" 

893 # Store some data with the normal example storage class. 

894 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

895 datasetTypeName = "test_metric" 

896 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

897 

898 dataId = {"instrument": "DummyCamComp", "visit": 423} 

899 

900 # Put a dict and this should coerce to a MetricsExample 

901 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

902 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

903 test_metric = butler.get(metric_ref) 

904 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

905 self.assertEqual(test_metric.summary, test_dict["summary"]) 

906 self.assertEqual(test_metric.output, test_dict["output"]) 

907 

908 # Check that the put still works if a DatasetType is given with 

909 # a definition matching this python type. 

910 registry_type = butler.get_dataset_type(datasetTypeName) 

911 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

912 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

913 self.assertEqual(metric2_ref.datasetType, registry_type) 

914 

915 # The get will return the type expected by registry. 

916 test_metric2 = butler.get(metric2_ref) 

917 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

918 

919 # Make a new DatasetRef with the compatible but different DatasetType. 

920 # This should now return a dict. 

921 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

922 test_dict2 = butler.get(new_ref) 

923 self.assertEqual(get_full_type_name(test_dict2), "dict") 

924 

925 # Get it again with the wrong dataset type definition using get() 

926 # rather than get(). This should be consistent with get() 

927 # behavior and return the type of the DatasetType. 

928 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

929 self.assertEqual(get_full_type_name(test_dict3), "dict") 

930 

931 def testIngest(self) -> None: 

932 butler = self.create_empty_butler(run=self.default_run) 

933 

934 # Create and register a DatasetType 

935 dimensions = butler.dimensions.conform(["instrument", "visit", "detector"]) 

936 

937 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

938 datasetTypeName = "metric" 

939 

940 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

941 

942 # Add needed Dimensions 

943 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

944 butler.registry.insertDimensionData( 

945 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

946 ) 

947 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20250101}) 

948 for detector in (1, 2): 

949 butler.registry.insertDimensionData( 

950 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

951 ) 

952 

953 butler.registry.insertDimensionData( 

954 "visit", 

955 { 

956 "instrument": "DummyCamComp", 

957 "id": 423, 

958 "name": "fourtwentythree", 

959 "physical_filter": "d-r", 

960 "day_obs": 20250101, 

961 }, 

962 { 

963 "instrument": "DummyCamComp", 

964 "id": 424, 

965 "name": "fourtwentyfour", 

966 "physical_filter": "d-r", 

967 "day_obs": 20250101, 

968 }, 

969 ) 

970 

971 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

972 dataRoot = os.path.join(TESTDIR, "data", "basic") 

973 datasets = [] 

974 for detector in (1, 2): 

975 detector_name = f"detector_{detector}" 

976 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

977 dataId = butler.registry.expandDataId( 

978 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

979 ) 

980 # Create a DatasetRef for ingest 

981 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

982 

983 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

984 

985 butler.ingest(*datasets, transfer="copy") 

986 

987 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

988 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

989 

990 metrics1 = butler.get(datasetTypeName, dataId1) 

991 metrics2 = butler.get(datasetTypeName, dataId2) 

992 self.assertNotEqual(metrics1, metrics2) 

993 

994 # Compare URIs 

995 uri1 = butler.getURI(datasetTypeName, dataId1) 

996 uri2 = butler.getURI(datasetTypeName, dataId2) 

997 self.assertFalse(self.are_uris_equivalent(uri1, uri2), f"Cf. {uri1} with {uri2}") 

998 

999 # Now do a multi-dataset but single file ingest 

1000 metricFile = os.path.join(dataRoot, "detectors.yaml") 

1001 refs = [] 

1002 for detector in (1, 2): 

1003 detector_name = f"detector_{detector}" 

1004 dataId = butler.registry.expandDataId( 

1005 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

1006 ) 

1007 # Create a DatasetRef for ingest 

1008 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

1009 

1010 # Test "move" transfer to ensure that the files themselves 

1011 # have disappeared following ingest. 

1012 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

1013 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

1014 

1015 datasets = [] 

1016 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

1017 

1018 # For first ingest use copy. 

1019 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

1020 

1021 # Now try to ingest again in "execution butler" mode where 

1022 # the registry entries exist but the datastore does not have 

1023 # the files. We also need to strip the dimension records to ensure 

1024 # that they will be re-added by the ingest. 

1025 ref = datasets[0].refs[0] 

1026 datasets[0].refs = [ 

1027 cast( 

1028 DatasetRef, 

1029 butler.find_dataset(ref.datasetType, data_id=ref.dataId, collections=ref.run), 

1030 ) 

1031 for ref in datasets[0].refs 

1032 ] 

1033 all_refs = [] 

1034 for dataset in datasets: 

1035 refs = [] 

1036 for ref in dataset.refs: 

1037 # Create a dict from the dataId to drop the records. 

1038 new_data_id = dict(ref.dataId.required) 

1039 new_ref = butler.find_dataset(ref.datasetType, new_data_id, collections=ref.run) 

1040 assert new_ref is not None 

1041 self.assertFalse(new_ref.dataId.hasRecords()) 

1042 refs.append(new_ref) 

1043 dataset.refs = refs 

1044 all_refs.extend(dataset.refs) 

1045 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

1046 

1047 # Use move mode to test that the file is deleted. Also 

1048 # disable recording of file size. 

1049 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

1050 

1051 # Check that every ref now has records. 

1052 for dataset in datasets: 

1053 for ref in dataset.refs: 

1054 self.assertTrue(ref.dataId.hasRecords()) 

1055 

1056 # Ensure that the file has disappeared. 

1057 self.assertFalse(tempFile.exists()) 

1058 

1059 # Check that the datastore recorded no file size. 

1060 # Not all datastores can support this. 

1061 try: 

1062 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

1063 self.assertEqual(infos[0].file_size, -1) 

1064 except AttributeError: 

1065 pass 

1066 

1067 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

1068 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

1069 

1070 multi1 = butler.get(datasetTypeName, dataId1) 

1071 multi2 = butler.get(datasetTypeName, dataId2) 

1072 

1073 self.assertEqual(multi1, metrics1) 

1074 self.assertEqual(multi2, metrics2) 

1075 

1076 # Compare URIs 

1077 uri1 = butler.getURI(datasetTypeName, dataId1) 

1078 uri2 = butler.getURI(datasetTypeName, dataId2) 

1079 self.assertTrue(self.are_uris_equivalent(uri1, uri2), f"Cf. {uri1} with {uri2}") 

1080 

1081 # Test that removing one does not break the second 

1082 # This line will issue a warning log message for a ChainedDatastore 

1083 # that uses an InMemoryDatastore since in-memory can not ingest 

1084 # files. 

1085 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

1086 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

1087 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

1088 multi2b = butler.get(datasetTypeName, dataId2) 

1089 self.assertEqual(multi2, multi2b) 

1090 

1091 # Ensure we can ingest 0 datasets 

1092 datasets = [] 

1093 butler.ingest(*datasets) 

1094 

1095 def testPickle(self) -> None: 

1096 """Test pickle support.""" 

1097 butler = self.create_empty_butler(run=self.default_run) 

1098 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

1099 butlerOut = pickle.loads(pickle.dumps(butler)) 

1100 self.assertIsInstance(butlerOut, Butler) 

1101 self.assertEqual(butlerOut._config, butler._config) 

1102 self.assertEqual(butlerOut.collections, butler.collections) 

1103 self.assertEqual(butlerOut.run, butler.run) 

1104 

1105 def testGetDatasetTypes(self) -> None: 

1106 butler = self.create_empty_butler(run=self.default_run) 

1107 dimensions = butler.dimensions.conform(["instrument", "visit", "physical_filter"]) 

1108 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

1109 ( 

1110 "instrument", 

1111 [ 

1112 {"instrument": "DummyCam"}, 

1113 {"instrument": "DummyHSC"}, 

1114 {"instrument": "DummyCamComp"}, 

1115 ], 

1116 ), 

1117 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

1118 ("day_obs", [{"instrument": "DummyCam", "id": 20250101}]), 

1119 ( 

1120 "visit", 

1121 [ 

1122 { 

1123 "instrument": "DummyCam", 

1124 "id": 42, 

1125 "name": "fortytwo", 

1126 "physical_filter": "d-r", 

1127 "day_obs": 20250101, 

1128 } 

1129 ], 

1130 ), 

1131 ] 

1132 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1133 # Add needed Dimensions 

1134 for element, data in dimensionEntries: 

1135 butler.registry.insertDimensionData(element, *data) 

1136 

1137 # When a DatasetType is added to the registry entries are not created 

1138 # for components but querying them can return the components. 

1139 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1140 components = set() 

1141 for datasetTypeName in datasetTypeNames: 

1142 # Create and register a DatasetType 

1143 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1144 

1145 for componentName in storageClass.components: 

1146 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1147 

1148 fromRegistry: set[DatasetType] = set() 

1149 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1150 fromRegistry.add(parent_dataset_type) 

1151 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1152 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1153 

1154 # Now that we have some dataset types registered, validate them 

1155 butler.validateConfiguration( 

1156 ignore=[ 

1157 "test_metric_comp", 

1158 "metric3", 

1159 "metric5", 

1160 "calexp", 

1161 "DummySC", 

1162 "datasetType.component", 

1163 "random_data", 

1164 "random_data_2", 

1165 ] 

1166 ) 

1167 

1168 # Add a new datasetType that will fail template validation 

1169 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1170 if self.validationCanFail: 

1171 with self.assertRaises(ValidationError): 

1172 butler.validateConfiguration() 

1173 

1174 # Rerun validation but with a subset of dataset type names 

1175 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1176 

1177 # Rerun validation but ignore the bad datasetType 

1178 butler.validateConfiguration( 

1179 ignore=[ 

1180 "test_metric_comp", 

1181 "metric3", 

1182 "metric5", 

1183 "calexp", 

1184 "DummySC", 

1185 "datasetType.component", 

1186 "random_data", 

1187 "random_data_2", 

1188 ] 

1189 ) 

1190 

1191 def testTransaction(self) -> None: 

1192 butler = self.create_empty_butler(run=self.default_run) 

1193 datasetTypeName = "test_metric" 

1194 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1195 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1196 ("instrument", {"instrument": "DummyCam"}), 

1197 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1198 ("day_obs", {"instrument": "DummyCam", "id": 20250101}), 

1199 ( 

1200 "visit", 

1201 { 

1202 "instrument": "DummyCam", 

1203 "id": 42, 

1204 "name": "fortytwo", 

1205 "physical_filter": "d-r", 

1206 "day_obs": 20250101, 

1207 }, 

1208 ), 

1209 ) 

1210 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1211 metric = makeExampleMetrics() 

1212 dataId = {"instrument": "DummyCam", "visit": 42} 

1213 # Create and register a DatasetType 

1214 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1215 with self.assertRaises(TransactionTestError): 

1216 with butler.transaction(): 

1217 # Add needed Dimensions 

1218 for args in dimensionEntries: 

1219 butler.registry.insertDimensionData(*args) 

1220 # Store a dataset 

1221 ref = butler.put(metric, datasetTypeName, dataId) 

1222 self.assertIsInstance(ref, DatasetRef) 

1223 # Test get of a ref. 

1224 metricOut = butler.get(ref) 

1225 self.assertEqual(metric, metricOut) 

1226 # Test get 

1227 metricOut = butler.get(datasetTypeName, dataId) 

1228 self.assertEqual(metric, metricOut) 

1229 # Check we can get components 

1230 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1231 raise TransactionTestError("This should roll back the entire transaction") 

1232 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1233 butler.registry.expandDataId(dataId) 

1234 # Should raise LookupError for missing data ID value 

1235 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1236 butler.get(datasetTypeName, dataId) 

1237 # Also check explicitly if Dataset entry is missing 

1238 self.assertIsNone(butler.find_dataset(datasetType, dataId, collections=butler.collections)) 

1239 # Direct retrieval should not find the file in the Datastore 

1240 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1241 butler.get(ref) 

1242 

1243 def testMakeRepo(self) -> None: 

1244 """Test that we can write butler configuration to a new repository via 

1245 the Butler.makeRepo interface and then instantiate a butler from the 

1246 repo root. 

1247 """ 

1248 # Do not run the test if we know this datastore configuration does 

1249 # not support a file system root 

1250 if self.fullConfigKey is None: 

1251 return 

1252 

1253 # create two separate directories 

1254 root1 = tempfile.mkdtemp(dir=self.root) 

1255 root2 = tempfile.mkdtemp(dir=self.root) 

1256 

1257 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1258 limited = Config(self.configFile) 

1259 butler1 = Butler.from_config(butlerConfig) 

1260 assert isinstance(butler1, DirectButler), "Expect DirectButler in configuration" 

1261 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1262 full = Config(self.tmpConfigFile) 

1263 butler2 = Butler.from_config(butlerConfig) 

1264 assert isinstance(butler2, DirectButler), "Expect DirectButler in configuration" 

1265 # Butlers should have the same configuration regardless of whether 

1266 # defaults were expanded. 

1267 self.assertEqual(butler1._config, butler2._config) 

1268 # Config files loaded directly should not be the same. 

1269 self.assertNotEqual(limited, full) 

1270 # Make sure "limited" doesn't have a few keys we know it should be 

1271 # inheriting from defaults. 

1272 self.assertIn(self.fullConfigKey, full) 

1273 self.assertNotIn(self.fullConfigKey, limited) 

1274 

1275 # Collections don't appear until something is put in them 

1276 collections1 = set(butler1.registry.queryCollections()) 

1277 self.assertEqual(collections1, set()) 

1278 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1279 

1280 # Check that a config with no associated file name will not 

1281 # work properly with relocatable Butler repo 

1282 butlerConfig.configFile = None 

1283 with self.assertRaises(ValueError): 

1284 Butler.from_config(butlerConfig) 

1285 

1286 with self.assertRaises(FileExistsError): 

1287 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1288 

1289 def testStringification(self) -> None: 

1290 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1291 butlerStr = str(butler) 

1292 

1293 if self.datastoreStr is not None: 

1294 for testStr in self.datastoreStr: 

1295 self.assertIn(testStr, butlerStr) 

1296 if self.registryStr is not None: 

1297 self.assertIn(self.registryStr, butlerStr) 

1298 

1299 datastoreName = butler._datastore.name 

1300 if self.datastoreName is not None: 

1301 for testStr in self.datastoreName: 

1302 self.assertIn(testStr, datastoreName) 

1303 

1304 def testButlerRewriteDataId(self) -> None: 

1305 """Test that dataIds can be rewritten based on dimension records.""" 

1306 butler = self.create_empty_butler(run=self.default_run) 

1307 

1308 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1309 datasetTypeName = "random_data" 

1310 

1311 # Create dimension records. 

1312 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1313 butler.registry.insertDimensionData( 

1314 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1315 ) 

1316 butler.registry.insertDimensionData( 

1317 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1318 ) 

1319 

1320 dimensions = butler.dimensions.conform(["instrument", "exposure"]) 

1321 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1322 butler.registry.registerDatasetType(datasetType) 

1323 

1324 n_exposures = 5 

1325 dayobs = 20210530 

1326 

1327 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": dayobs}) 

1328 

1329 for i in range(n_exposures): 

1330 butler.registry.insertDimensionData("group", {"instrument": "DummyCamComp", "name": f"group{i}"}) 

1331 butler.registry.insertDimensionData( 

1332 "exposure", 

1333 { 

1334 "instrument": "DummyCamComp", 

1335 "id": i, 

1336 "obs_id": f"exp{i}", 

1337 "seq_num": i, 

1338 "day_obs": dayobs, 

1339 "physical_filter": "d-r", 

1340 "group": f"group{i}", 

1341 }, 

1342 ) 

1343 

1344 # Write some data. 

1345 for i in range(n_exposures): 

1346 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1347 

1348 # Use the seq_num for the put to test rewriting. 

1349 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1350 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1351 

1352 # Check that the exposure is correct in the dataId 

1353 self.assertEqual(ref.dataId["exposure"], i) 

1354 

1355 # and check that we can get the dataset back with the same dataId 

1356 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1357 self.assertEqual(new_metric, metric) 

1358 

1359 # Check that we can find the datasets using the day_obs or the 

1360 # exposure.day_obs. 

1361 datasets_1 = list( 

1362 butler.registry.queryDatasets( 

1363 datasetType, 

1364 collections=self.default_run, 

1365 where="day_obs = dayObs AND instrument = instr", 

1366 bind={"dayObs": dayobs, "instr": "DummyCamComp"}, 

1367 ) 

1368 ) 

1369 datasets_2 = list( 

1370 butler.registry.queryDatasets( 

1371 datasetType, 

1372 collections=self.default_run, 

1373 where="exposure.day_obs = dayObs AND instrument = instr", 

1374 bind={"dayObs": dayobs, "instr": "DummyCamComp"}, 

1375 ) 

1376 ) 

1377 self.assertEqual(datasets_1, datasets_2) 

1378 

1379 def testGetDatasetCollectionCaching(self): 

1380 # Prior to DM-41117, there was a bug where get_dataset would throw 

1381 # MissingCollectionError if you tried to fetch a dataset that was added 

1382 # after the collection cache was last updated. 

1383 reader_butler, datasetType = self.create_butler(self.default_run, "int", "datasettypename") 

1384 writer_butler = self.create_empty_butler(writeable=True, run="new_run") 

1385 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1386 put_ref = writer_butler.put(123, datasetType, dataId) 

1387 get_ref = reader_butler.get_dataset(put_ref.id) 

1388 self.assertEqual(get_ref.id, put_ref.id) 

1389 

1390 

1391class FileDatastoreButlerTests(ButlerTests): 

1392 """Common tests and specialization of ButlerTests for butlers backed 

1393 by datastores that inherit from FileDatastore. 

1394 """ 

1395 

1396 trustModeSupported = True 

1397 

1398 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1399 """Check if file exists at a given path (relative to root). 

1400 

1401 Test testPutTemplates verifies actual physical existance of the files 

1402 in the requested location. 

1403 """ 

1404 uri = ResourcePath(root, forceDirectory=True) 

1405 return uri.join(relpath).exists() 

1406 

1407 def testPutTemplates(self) -> None: 

1408 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1409 butler = self.create_empty_butler(run=self.default_run) 

1410 

1411 # Add needed Dimensions 

1412 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1413 butler.registry.insertDimensionData( 

1414 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1415 ) 

1416 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20250101}) 

1417 butler.registry.insertDimensionData( 

1418 "visit", 

1419 { 

1420 "instrument": "DummyCamComp", 

1421 "id": 423, 

1422 "name": "v423", 

1423 "physical_filter": "d-r", 

1424 "day_obs": 20250101, 

1425 }, 

1426 ) 

1427 butler.registry.insertDimensionData( 

1428 "visit", 

1429 { 

1430 "instrument": "DummyCamComp", 

1431 "id": 425, 

1432 "name": "v425", 

1433 "physical_filter": "d-r", 

1434 "day_obs": 20250101, 

1435 }, 

1436 ) 

1437 

1438 # Create and store a dataset 

1439 metric = makeExampleMetrics() 

1440 

1441 # Create two almost-identical DatasetTypes (both will use default 

1442 # template) 

1443 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1444 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1445 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1446 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1447 

1448 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1449 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1450 

1451 # Put with exactly the data ID keys needed 

1452 ref = butler.put(metric, "metric1", dataId1) 

1453 uri = butler.getURI(ref) 

1454 self.assertTrue(uri.exists()) 

1455 self.assertTrue( 

1456 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1457 ) 

1458 

1459 # Check the template based on dimensions 

1460 if hasattr(butler._datastore, "templates"): 

1461 butler._datastore.templates.validateTemplates([ref]) 

1462 

1463 # Put with extra data ID keys (physical_filter is an optional 

1464 # dependency); should not change template (at least the way we're 

1465 # defining them to behave now; the important thing is that they 

1466 # must be consistent). 

1467 ref = butler.put(metric, "metric2", dataId2) 

1468 uri = butler.getURI(ref) 

1469 self.assertTrue(uri.exists()) 

1470 self.assertTrue( 

1471 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1472 ) 

1473 

1474 # Check the template based on dimensions 

1475 if hasattr(butler._datastore, "templates"): 

1476 butler._datastore.templates.validateTemplates([ref]) 

1477 

1478 # Use a template that has a typo in dimension record metadata. 

1479 # Easier to test with a butler that has a ref with records attached. 

1480 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1481 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1482 path = template.format(ref) 

1483 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1484 

1485 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1486 with self.assertRaises(KeyError): 

1487 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1488 template.format(ref) 

1489 

1490 # Now use a file template that will not result in unique filenames 

1491 with self.assertRaises(FileTemplateValidationError): 

1492 butler.put(metric, "metric3", dataId1) 

1493 

1494 def testImportExport(self) -> None: 

1495 # Run put/get tests just to create and populate a repo. 

1496 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1497 self.runImportExportTest(storageClass) 

1498 

1499 @unittest.expectedFailure 

1500 def testImportExportVirtualComposite(self) -> None: 

1501 # Run put/get tests just to create and populate a repo. 

1502 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1503 self.runImportExportTest(storageClass) 

1504 

1505 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1506 """Test exporting and importing. 

1507 

1508 This test does an export to a temp directory and an import back 

1509 into a new temp directory repo. It does not assume a posix datastore. 

1510 """ 

1511 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1512 

1513 # Test that we must have a file extension. 

1514 with self.assertRaises(ValueError): 

1515 with exportButler.export(filename="dump", directory=".") as export: 

1516 pass 

1517 

1518 # Test that unknown format is not allowed. 

1519 with self.assertRaises(ValueError): 

1520 with exportButler.export(filename="dump.fits", directory=".") as export: 

1521 pass 

1522 

1523 # Test that the repo actually has at least one dataset. 

1524 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1525 self.assertGreater(len(datasets), 0) 

1526 # Add a DimensionRecord that's unused by those datasets. 

1527 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1528 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1529 # Export and then import datasets. 

1530 with safeTestTempDir(TESTDIR) as exportDir: 

1531 exportFile = os.path.join(exportDir, "exports.yaml") 

1532 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1533 export.saveDatasets(datasets) 

1534 # Export the same datasets again. This should quietly do 

1535 # nothing because of internal deduplication, and it shouldn't 

1536 # complain about being asked to export the "htm7" elements even 

1537 # though there aren't any in these datasets or in the database. 

1538 export.saveDatasets(datasets, elements=["htm7"]) 

1539 # Save one of the data IDs again; this should be harmless 

1540 # because of internal deduplication. 

1541 export.saveDataIds([datasets[0].dataId]) 

1542 # Save some dimension records directly. 

1543 export.saveDimensionData("skymap", [skymapRecord]) 

1544 self.assertTrue(os.path.exists(exportFile)) 

1545 with safeTestTempDir(TESTDIR) as importDir: 

1546 # We always want this to be a local posix butler 

1547 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1548 # Calling script.butlerImport tests the implementation of the 

1549 # butler command line interface "import" subcommand. Functions 

1550 # in the script folder are generally considered protected and 

1551 # should not be used as public api. 

1552 with open(exportFile) as f: 

1553 script.butlerImport( 

1554 importDir, 

1555 export_file=f, 

1556 directory=exportDir, 

1557 transfer="auto", 

1558 skip_dimensions=None, 

1559 ) 

1560 importButler = Butler.from_config(importDir, run=self.default_run) 

1561 for ref in datasets: 

1562 with self.subTest(ref=ref): 

1563 # Test for existence by passing in the DatasetType and 

1564 # data ID separately, to avoid lookup by dataset_id. 

1565 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1566 self.assertEqual( 

1567 list(importButler.registry.queryDimensionRecords("skymap")), 

1568 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)], 

1569 ) 

1570 

1571 def testRemoveRuns(self) -> None: 

1572 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1573 butler = self.create_empty_butler(writeable=True) 

1574 # Load registry data with dimensions to hang datasets off of. 

1575 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1576 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1577 # Add some RUN-type collection. 

1578 run1 = "run1" 

1579 butler.registry.registerRun(run1) 

1580 run2 = "run2" 

1581 butler.registry.registerRun(run2) 

1582 # put a dataset in each 

1583 metric = makeExampleMetrics() 

1584 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1585 datasetType = self.addDatasetType( 

1586 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1587 ) 

1588 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1589 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1590 uri1 = butler.getURI(ref1) 

1591 uri2 = butler.getURI(ref2) 

1592 

1593 with self.assertRaises(OrphanedRecordError): 

1594 butler.registry.removeDatasetType(datasetType.name) 

1595 

1596 # Remove from both runs with different values for unstore. 

1597 butler.removeRuns([run1], unstore=True) 

1598 butler.removeRuns([run2], unstore=False) 

1599 # Should be nothing in registry for either one, and datastore should 

1600 # not think either exists. 

1601 with self.assertRaises(MissingCollectionError): 

1602 butler.registry.getCollectionType(run1) 

1603 with self.assertRaises(MissingCollectionError): 

1604 butler.registry.getCollectionType(run2) 

1605 self.assertFalse(butler.stored(ref1)) 

1606 self.assertFalse(butler.stored(ref2)) 

1607 # The ref we unstored should be gone according to the URI, but the 

1608 # one we forgot should still be around. 

1609 self.assertFalse(uri1.exists()) 

1610 self.assertTrue(uri2.exists()) 

1611 

1612 # Now that the collections have been pruned we can remove the 

1613 # dataset type 

1614 butler.registry.removeDatasetType(datasetType.name) 

1615 

1616 with self.assertLogs("lsst.daf.butler.registry", "INFO") as cm: 

1617 butler.registry.removeDatasetType(("test*", "test*")) 

1618 self.assertIn("not defined", "\n".join(cm.output)) 

1619 

1620 def remove_dataset_out_of_band(self, butler: Butler, ref: DatasetRef) -> None: 

1621 """Simulate an external actor removing a file outside of Butler's 

1622 knowledge. 

1623 

1624 Subclasses may override to handle more complicated datastore 

1625 configurations. 

1626 """ 

1627 uri = butler.getURI(ref) 

1628 uri.remove() 

1629 datastore = cast(FileDatastore, butler._datastore) 

1630 datastore.cacheManager.remove_from_cache(ref) 

1631 

1632 def testPruneDatasets(self) -> None: 

1633 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1634 butler = self.create_empty_butler(writeable=True) 

1635 # Load registry data with dimensions to hang datasets off of. 

1636 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1637 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1638 # Add some RUN-type collections. 

1639 run1 = "run1" 

1640 butler.registry.registerRun(run1) 

1641 run2 = "run2" 

1642 butler.registry.registerRun(run2) 

1643 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1644 # different runs. ref3 has a different data ID. 

1645 metric = makeExampleMetrics() 

1646 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1647 datasetType = self.addDatasetType( 

1648 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1649 ) 

1650 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1651 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1652 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1653 

1654 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1655 for ref, stored in many_stored.items(): 

1656 self.assertTrue(stored, f"Ref {ref} should be stored") 

1657 

1658 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1659 for ref, exists in many_exists.items(): 

1660 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1661 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1662 

1663 # Simple prune. 

1664 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1665 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1666 

1667 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1668 for ref, stored in many_stored.items(): 

1669 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1670 

1671 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1672 for ref, exists in many_exists.items(): 

1673 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1674 

1675 # Put data back. 

1676 ref1_new = butler.put(metric, ref1) 

1677 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1678 ref2 = butler.put(metric, ref2) 

1679 

1680 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1681 self.assertTrue(many_stored[ref1]) 

1682 self.assertTrue(many_stored[ref2]) 

1683 self.assertFalse(many_stored[ref3]) 

1684 

1685 ref3 = butler.put(metric, ref3) 

1686 

1687 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1688 for ref, exists in many_exists.items(): 

1689 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1690 

1691 # Clear out the datasets from registry and start again. 

1692 refs = [ref1, ref2, ref3] 

1693 butler.pruneDatasets(refs, purge=True, unstore=True) 

1694 for ref in refs: 

1695 butler.put(metric, ref) 

1696 

1697 # Confirm we can retrieve deferred. 

1698 dref1 = butler.getDeferred(ref1) # known and exists 

1699 metric1 = dref1.get() 

1700 self.assertEqual(metric1, metric) 

1701 

1702 # Test different forms of file availability. 

1703 # Need to be in a state where: 

1704 # - one ref just has registry record. 

1705 # - one ref has a missing file but a datastore record. 

1706 # - one ref has a missing datastore record but file is there. 

1707 # - one ref does not exist anywhere. 

1708 # Do not need to test a ref that has everything since that is tested 

1709 # above. 

1710 ref0 = DatasetRef( 

1711 datasetType, 

1712 DataCoordinate.standardize( 

1713 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1714 ), 

1715 run=run1, 

1716 ) 

1717 

1718 # Delete from datastore and retain in Registry. 

1719 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1720 

1721 # File has been removed. 

1722 self.remove_dataset_out_of_band(butler, ref2) 

1723 

1724 # Datastore has lost track. 

1725 butler._datastore.forget([ref3]) 

1726 

1727 # First test with a standard butler. 

1728 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1729 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1730 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1731 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1732 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1733 

1734 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1735 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1736 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1737 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1738 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1739 self.assertTrue(exists_many[ref2]) 

1740 

1741 # Check that per-ref query gives the same answer as many query. 

1742 for ref, exists in exists_many.items(): 

1743 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1744 

1745 # Get deferred checks for existence before it allows it to be 

1746 # retrieved. 

1747 with self.assertRaises(LookupError): 

1748 butler.getDeferred(ref3) # not known, file exists 

1749 dref2 = butler.getDeferred(ref2) # known but file missing 

1750 with self.assertRaises(FileNotFoundError): 

1751 dref2.get() 

1752 

1753 # Test again with a trusting butler. 

1754 if self.trustModeSupported: 

1755 butler._datastore.trustGetRequest = True 

1756 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1757 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1758 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1759 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1760 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1761 

1762 # When trusting we can get a deferred dataset handle that is not 

1763 # known but does exist. 

1764 dref3 = butler.getDeferred(ref3) 

1765 metric3 = dref3.get() 

1766 self.assertEqual(metric3, metric) 

1767 

1768 # Check that per-ref query gives the same answer as many query. 

1769 for ref, exists in exists_many.items(): 

1770 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1771 

1772 # Create a ref that surprisingly has the UUID of an existing ref 

1773 # but is not the same. 

1774 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1775 with self.assertRaises(ValueError): 

1776 butler.exists(ref_bad) 

1777 

1778 # Create a ref that has a compatible storage class. 

1779 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1780 exists = butler.exists(ref_compat) 

1781 self.assertEqual(exists, exists_many[ref2]) 

1782 

1783 # Remove everything and start from scratch. 

1784 butler._datastore.trustGetRequest = False 

1785 butler.pruneDatasets(refs, purge=True, unstore=True) 

1786 for ref in refs: 

1787 butler.put(metric, ref) 

1788 

1789 # These tests mess directly with the trash table and can leave the 

1790 # datastore in an odd state. Do them at the end. 

1791 # Check that in normal mode, deleting the record will lead to 

1792 # trash not touching the file. 

1793 uri1 = butler.getURI(ref1) 

1794 butler._datastore.bridge.moveToTrash( 

1795 [ref1], transaction=None 

1796 ) # Update the dataset_location table 

1797 butler._datastore.forget([ref1]) 

1798 butler._datastore.trash(ref1) 

1799 butler._datastore.emptyTrash() 

1800 self.assertTrue(uri1.exists()) 

1801 uri1.remove() # Clean it up. 

1802 

1803 # Simulate execution butler setup by deleting the datastore 

1804 # record but keeping the file around and trusting. 

1805 butler._datastore.trustGetRequest = True 

1806 uris = butler.get_many_uris([ref2, ref3]) 

1807 uri2 = uris[ref2].primaryURI 

1808 uri3 = uris[ref3].primaryURI 

1809 self.assertTrue(uri2.exists()) 

1810 self.assertTrue(uri3.exists()) 

1811 

1812 # Remove the datastore record. 

1813 butler._datastore.bridge.moveToTrash( 

1814 [ref2], transaction=None 

1815 ) # Update the dataset_location table 

1816 butler._datastore.forget([ref2]) 

1817 self.assertTrue(uri2.exists()) 

1818 butler._datastore.trash([ref2, ref3]) 

1819 # Immediate removal for ref2 file 

1820 self.assertFalse(uri2.exists()) 

1821 # But ref3 has to wait for the empty. 

1822 self.assertTrue(uri3.exists()) 

1823 butler._datastore.emptyTrash() 

1824 self.assertFalse(uri3.exists()) 

1825 

1826 # Clear out the datasets from registry. 

1827 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1828 

1829 

1830class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1831 """PosixDatastore specialization of a butler""" 

1832 

1833 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1834 fullConfigKey: str | None = ".datastore.formatters" 

1835 validationCanFail = True 

1836 datastoreStr = ["/tmp"] 

1837 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1838 registryStr = "/gen3.sqlite3" 

1839 

1840 def testPathConstructor(self) -> None: 

1841 """Independent test of constructor using PathLike.""" 

1842 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1843 self.assertIsInstance(butler, Butler) 

1844 

1845 # And again with a Path object with the butler yaml 

1846 path = pathlib.Path(self.tmpConfigFile) 

1847 butler = Butler.from_config(path, writeable=False) 

1848 self.assertIsInstance(butler, Butler) 

1849 

1850 # And again with a Path object without the butler yaml 

1851 # (making sure we skip it if the tmp config doesn't end 

1852 # in butler.yaml -- which is the case for a subclass) 

1853 if self.tmpConfigFile.endswith("butler.yaml"): 

1854 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1855 butler = Butler.from_config(path, writeable=False) 

1856 self.assertIsInstance(butler, Butler) 

1857 

1858 def testExportTransferCopy(self) -> None: 

1859 """Test local export using all transfer modes""" 

1860 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1861 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1862 # Test that the repo actually has at least one dataset. 

1863 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1864 self.assertGreater(len(datasets), 0) 

1865 uris = [exportButler.getURI(d) for d in datasets] 

1866 assert isinstance(exportButler._datastore, FileDatastore) 

1867 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]] 

1868 

1869 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1870 

1871 for path in pathsInStore: 

1872 # Assume local file system 

1873 assert path is not None 

1874 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1875 

1876 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1877 with safeTestTempDir(TESTDIR) as exportDir: 

1878 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1879 export.saveDatasets(datasets) 

1880 for path in pathsInStore: 

1881 assert path is not None 

1882 self.assertTrue( 

1883 self.checkFileExists(exportDir, path), 

1884 f"Check that mode {transfer} exported files", 

1885 ) 

1886 

1887 def testPytypeCoercion(self) -> None: 

1888 """Test python type coercion on Butler.get and put.""" 

1889 # Store some data with the normal example storage class. 

1890 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1891 datasetTypeName = "test_metric" 

1892 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1893 

1894 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1895 metric = butler.get(datasetTypeName, dataId=dataId) 

1896 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1897 

1898 datasetType_ori = butler.get_dataset_type(datasetTypeName) 

1899 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1900 

1901 # Now need to hack the registry dataset type definition. 

1902 # There is no API for this. 

1903 assert isinstance(butler._registry, SqlRegistry) 

1904 manager = butler._registry._managers.datasets 

1905 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1906 manager._db.update( 

1907 manager._static.dataset_type, 

1908 {"name": datasetTypeName}, 

1909 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1910 ) 

1911 

1912 # Force reset of dataset type cache 

1913 butler.registry.refresh() 

1914 

1915 datasetType_new = butler.get_dataset_type(datasetTypeName) 

1916 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1917 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1918 

1919 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1920 self.assertNotEqual(type(metric_model), type(metric)) 

1921 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1922 

1923 # Put the model and read it back to show that everything now 

1924 # works as normal. 

1925 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1926 metric_model_new = butler.get(metric_ref) 

1927 self.assertEqual(metric_model_new, metric_model) 

1928 

1929 # Hack the storage class again to something that will fail on the 

1930 # get with no conversion class. 

1931 manager._db.update( 

1932 manager._static.dataset_type, 

1933 {"name": datasetTypeName}, 

1934 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1935 ) 

1936 butler.registry.refresh() 

1937 

1938 with self.assertRaises(ValueError): 

1939 butler.get(datasetTypeName, dataId=dataId) 

1940 

1941 

1942@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1943class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1944 """PosixDatastore specialization of a butler using Postgres""" 

1945 

1946 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1947 fullConfigKey = ".datastore.formatters" 

1948 validationCanFail = True 

1949 datastoreStr = ["/tmp"] 

1950 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1951 registryStr = "PostgreSQL@test" 

1952 postgresql: Any 

1953 

1954 @staticmethod 

1955 def _handler(postgresql: Any) -> None: 

1956 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1957 with engine.begin() as connection: 

1958 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1959 

1960 @classmethod 

1961 def setUpClass(cls) -> None: 

1962 # Create the postgres test server. 

1963 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1964 cache_initialized_db=True, on_initialized=cls._handler 

1965 ) 

1966 super().setUpClass() 

1967 

1968 @classmethod 

1969 def tearDownClass(cls) -> None: 

1970 # Clean up any lingering SQLAlchemy engines/connections 

1971 # so they're closed before we shut down the server. 

1972 gc.collect() 

1973 cls.postgresql.clear_cache() 

1974 super().tearDownClass() 

1975 

1976 def setUp(self) -> None: 

1977 self.server = self.postgresql() 

1978 

1979 # Need to add a registry section to the config. 

1980 self._temp_config = False 

1981 config = Config(self.configFile) 

1982 config["registry", "db"] = self.server.url() 

1983 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1984 config.dump(fh) 

1985 self.configFile = fh.name 

1986 self._temp_config = True 

1987 super().setUp() 

1988 

1989 def tearDown(self) -> None: 

1990 self.server.stop() 

1991 if self._temp_config and os.path.exists(self.configFile): 

1992 os.remove(self.configFile) 

1993 super().tearDown() 

1994 

1995 def testMakeRepo(self) -> None: 

1996 # The base class test assumes that it's using sqlite and assumes 

1997 # the config file is acceptable to sqlite. 

1998 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1999 

2000 

2001@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

2002class ClonedPostgresPosixDatastoreButlerTestCase(PostgresPosixDatastoreButlerTestCase, unittest.TestCase): 

2003 """Test that Butler with a Postgres registry still works after cloning.""" 

2004 

2005 def create_butler( 

2006 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

2007 ) -> tuple[DirectButler, DatasetType]: 

2008 butler, datasetType = super().create_butler(run, storageClass, datasetTypeName) 

2009 return butler._clone(run=run), datasetType 

2010 

2011 

2012class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

2013 """InMemoryDatastore specialization of a butler""" 

2014 

2015 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

2016 fullConfigKey = None 

2017 useTempRoot = False 

2018 validationCanFail = False 

2019 datastoreStr = ["datastore='InMemory"] 

2020 datastoreName = ["InMemoryDatastore@"] 

2021 registryStr = "/gen3.sqlite3" 

2022 

2023 def testIngest(self) -> None: 

2024 pass 

2025 

2026 

2027class ClonedSqliteButlerTestCase(InMemoryDatastoreButlerTestCase, unittest.TestCase): 

2028 """Test that a Butler with a Sqlite registry still works after cloning.""" 

2029 

2030 def create_butler( 

2031 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

2032 ) -> tuple[DirectButler, DatasetType]: 

2033 butler, datasetType = super().create_butler(run, storageClass, datasetTypeName) 

2034 return butler._clone(run=run), datasetType 

2035 

2036 

2037class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

2038 """PosixDatastore specialization""" 

2039 

2040 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2041 fullConfigKey = ".datastore.datastores.1.formatters" 

2042 validationCanFail = True 

2043 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

2044 datastoreName = [ 

2045 "InMemoryDatastore@", 

2046 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

2047 "SecondDatastore", 

2048 ] 

2049 registryStr = "/gen3.sqlite3" 

2050 

2051 def testPruneDatasets(self) -> None: 

2052 # This test relies on manipulating files out-of-band, which is 

2053 # impossible for this configuration because of the InMemoryDatastore in 

2054 # the ChainedDatastore. 

2055 pass 

2056 

2057 

2058class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

2059 """Test that a yaml file in one location can refer to a root in another.""" 

2060 

2061 datastoreStr = ["dir1"] 

2062 # Disable the makeRepo test since we are deliberately not using 

2063 # butler.yaml as the config name. 

2064 fullConfigKey = None 

2065 

2066 def setUp(self) -> None: 

2067 self.root = makeTestTempDir(TESTDIR) 

2068 

2069 # Make a new repository in one place 

2070 self.dir1 = os.path.join(self.root, "dir1") 

2071 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

2072 

2073 # Move the yaml file to a different place and add a "root" 

2074 self.dir2 = os.path.join(self.root, "dir2") 

2075 os.makedirs(self.dir2, exist_ok=True) 

2076 configFile1 = os.path.join(self.dir1, "butler.yaml") 

2077 config = Config(configFile1) 

2078 config["root"] = self.dir1 

2079 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

2080 config.dumpToUri(configFile2) 

2081 os.remove(configFile1) 

2082 self.tmpConfigFile = configFile2 

2083 

2084 def testFileLocations(self) -> None: 

2085 self.assertNotEqual(self.dir1, self.dir2) 

2086 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

2087 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

2088 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

2089 

2090 

2091class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

2092 """Test that a config file created by makeRepo outside of repo works.""" 

2093 

2094 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2095 

2096 def setUp(self) -> None: 

2097 self.root = makeTestTempDir(TESTDIR) 

2098 self.root2 = makeTestTempDir(TESTDIR) 

2099 

2100 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

2101 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

2102 

2103 def tearDown(self) -> None: 

2104 if os.path.exists(self.root2): 

2105 shutil.rmtree(self.root2, ignore_errors=True) 

2106 super().tearDown() 

2107 

2108 def testConfigExistence(self) -> None: 

2109 c = Config(self.tmpConfigFile) 

2110 uri_config = ResourcePath(c["root"]) 

2111 uri_expected = ResourcePath(self.root, forceDirectory=True) 

2112 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

2113 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

2114 

2115 def testPutGet(self) -> None: 

2116 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

2117 self.runPutGetTest(storageClass, "test_metric") 

2118 

2119 

2120class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

2121 """Test that a config file created by makeRepo outside of repo works.""" 

2122 

2123 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2124 

2125 def setUp(self) -> None: 

2126 self.root = makeTestTempDir(TESTDIR) 

2127 self.root2 = makeTestTempDir(TESTDIR) 

2128 

2129 self.tmpConfigFile = self.root2 

2130 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

2131 

2132 def testConfigExistence(self) -> None: 

2133 # Append the yaml file else Config constructor does not know the file 

2134 # type. 

2135 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

2136 super().testConfigExistence() 

2137 

2138 

2139class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

2140 """Test that a config file created by makeRepo outside of repo works.""" 

2141 

2142 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2143 

2144 def setUp(self) -> None: 

2145 self.root = makeTestTempDir(TESTDIR) 

2146 self.root2 = makeTestTempDir(TESTDIR) 

2147 

2148 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

2149 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

2150 

2151 

2152@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

2153class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

2154 """S3Datastore specialization of a butler; an S3 storage Datastore + 

2155 a local in-memory SqlRegistry. 

2156 """ 

2157 

2158 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

2159 fullConfigKey = None 

2160 validationCanFail = True 

2161 

2162 bucketName = "anybucketname" 

2163 """Name of the Bucket that will be used in the tests. The name is read from 

2164 the config file used with the tests during set-up. 

2165 """ 

2166 

2167 root = "butlerRoot/" 

2168 """Root repository directory expected to be used in case useTempRoot=False. 

2169 Otherwise the root is set to a 20 characters long randomly generated string 

2170 during set-up. 

2171 """ 

2172 

2173 datastoreStr = [f"datastore={root}"] 

2174 """Contains all expected root locations in a format expected to be 

2175 returned by Butler stringification. 

2176 """ 

2177 

2178 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

2179 """The expected format of the S3 Datastore string.""" 

2180 

2181 registryStr = "/gen3.sqlite3" 

2182 """Expected format of the Registry string.""" 

2183 

2184 mock_aws = mock_aws() 

2185 """The mocked s3 interface from moto.""" 

2186 

2187 def genRoot(self) -> str: 

2188 """Return a random string of len 20 to serve as a root 

2189 name for the temporary bucket repo. 

2190 

2191 This is equivalent to tempfile.mkdtemp as this is what self.root 

2192 becomes when useTempRoot is True. 

2193 """ 

2194 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

2195 return rndstr + "/" 

2196 

2197 def setUp(self) -> None: 

2198 config = Config(self.configFile) 

2199 uri = ResourcePath(config[".datastore.datastore.root"]) 

2200 self.bucketName = uri.netloc 

2201 

2202 # Enable S3 mocking of tests. 

2203 self.enterContext(clean_test_environment_for_s3()) 

2204 self.mock_aws.start() 

2205 

2206 if self.useTempRoot: 

2207 self.root = self.genRoot() 

2208 rooturi = f"s3://{self.bucketName}/{self.root}" 

2209 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

2210 

2211 # need local folder to store registry database 

2212 self.reg_dir = makeTestTempDir(TESTDIR) 

2213 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

2214 

2215 # MOTO needs to know that we expect Bucket bucketname to exist 

2216 # (this used to be the class attribute bucketName) 

2217 s3 = boto3.resource("s3") 

2218 s3.create_bucket(Bucket=self.bucketName) 

2219 

2220 self.datastoreStr = [f"datastore='{rooturi}'"] 

2221 self.datastoreName = [f"FileDatastore@{rooturi}"] 

2222 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

2223 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

2224 

2225 def tearDown(self) -> None: 

2226 s3 = boto3.resource("s3") 

2227 bucket = s3.Bucket(self.bucketName) 

2228 try: 

2229 bucket.objects.all().delete() 

2230 except botocore.exceptions.ClientError as e: 

2231 if e.response["Error"]["Code"] == "404": 

2232 # the key was not reachable - pass 

2233 pass 

2234 else: 

2235 raise 

2236 

2237 bucket = s3.Bucket(self.bucketName) 

2238 bucket.delete() 

2239 

2240 # Stop the S3 mock. 

2241 self.mock_aws.stop() 

2242 

2243 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

2244 shutil.rmtree(self.reg_dir, ignore_errors=True) 

2245 

2246 if self.useTempRoot and os.path.exists(self.root): 

2247 shutil.rmtree(self.root, ignore_errors=True) 

2248 

2249 super().tearDown() 

2250 

2251 

2252class PosixDatastoreTransfers(unittest.TestCase): 

2253 """Test data transfers between butlers. 

2254 

2255 Test for different managers. UUID to UUID and integer to integer are 

2256 tested. UUID to integer is not supported since we do not currently 

2257 want to allow that. Integer to UUID is supported with the caveat 

2258 that UUID4 will be generated and this will be incorrect for raw 

2259 dataset types. The test ignores that. 

2260 """ 

2261 

2262 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2263 storageClassFactory: StorageClassFactory 

2264 

2265 @classmethod 

2266 def setUpClass(cls) -> None: 

2267 cls.storageClassFactory = StorageClassFactory() 

2268 cls.storageClassFactory.addFromConfig(cls.configFile) 

2269 

2270 def setUp(self) -> None: 

2271 self.root = makeTestTempDir(TESTDIR) 

2272 self.config = Config(self.configFile) 

2273 

2274 def tearDown(self) -> None: 

2275 removeTestTempDir(self.root) 

2276 

2277 def create_butler(self, manager: str, label: str) -> Butler: 

2278 config = Config(self.configFile) 

2279 config["registry", "managers", "datasets"] = manager 

2280 return Butler.from_config( 

2281 Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True 

2282 ) 

2283 

2284 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

2285 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

2286 if manager1 is None: 

2287 manager1 = default 

2288 if manager2 is None: 

2289 manager2 = default 

2290 self.source_butler = self.create_butler(manager1, "1") 

2291 self.target_butler = self.create_butler(manager2, "2") 

2292 

2293 def testTransferUuidToUuid(self) -> None: 

2294 self.create_butlers() 

2295 self.assertButlerTransfers() 

2296 

2297 def testTransferMissing(self) -> None: 

2298 """Test transfers where datastore records are missing. 

2299 

2300 This is how execution butler works. 

2301 """ 

2302 self.create_butlers() 

2303 

2304 # Configure the source butler to allow trust. 

2305 self.source_butler._datastore._set_trust_mode(True) 

2306 

2307 self.assertButlerTransfers(purge=True) 

2308 

2309 def testTransferMissingDisassembly(self) -> None: 

2310 """Test transfers where datastore records are missing. 

2311 

2312 This is how execution butler works. 

2313 """ 

2314 self.create_butlers() 

2315 

2316 # Configure the source butler to allow trust. 

2317 self.source_butler._datastore._set_trust_mode(True) 

2318 

2319 # Test disassembly. 

2320 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2321 

2322 def testAbsoluteURITransferDirect(self) -> None: 

2323 """Test transfer using an absolute URI.""" 

2324 self._absolute_transfer("auto") 

2325 

2326 def testAbsoluteURITransferCopy(self) -> None: 

2327 """Test transfer using an absolute URI.""" 

2328 self._absolute_transfer("copy") 

2329 

2330 def _absolute_transfer(self, transfer: str) -> None: 

2331 self.create_butlers() 

2332 

2333 storageClassName = "StructuredData" 

2334 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2335 datasetTypeName = "random_data" 

2336 run = "run1" 

2337 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2338 

2339 dimensions = self.source_butler.dimensions.conform(()) 

2340 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2341 self.source_butler.registry.registerDatasetType(datasetType) 

2342 

2343 metrics = makeExampleMetrics() 

2344 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2345 dataId = DataCoordinate.make_empty(self.source_butler.dimensions) 

2346 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2347 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2348 dataset = FileDataset(path=temp, refs=source_refs) 

2349 self.source_butler.ingest(dataset, transfer="direct") 

2350 

2351 self.target_butler.transfer_from( 

2352 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2353 ) 

2354 

2355 uri = self.target_butler.getURI(dataset.refs[0]) 

2356 if transfer == "auto": 

2357 self.assertEqual(uri, temp) 

2358 else: 

2359 self.assertNotEqual(uri, temp) 

2360 

2361 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2362 """Test that a run can be transferred to another butler.""" 

2363 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2364 datasetTypeName = "random_data" 

2365 

2366 # Test will create 3 collections and we will want to transfer 

2367 # two of those three. 

2368 runs = ["run1", "run2", "other"] 

2369 

2370 # Also want to use two different dataset types to ensure that 

2371 # grouping works. 

2372 datasetTypeNames = ["random_data", "random_data_2"] 

2373 

2374 # Create the run collections in the source butler. 

2375 for run in runs: 

2376 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2377 

2378 # Create dimensions in source butler. 

2379 n_exposures = 30 

2380 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2381 self.source_butler.registry.insertDimensionData( 

2382 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2383 ) 

2384 self.source_butler.registry.insertDimensionData( 

2385 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2386 ) 

2387 self.source_butler.registry.insertDimensionData( 

2388 "day_obs", 

2389 { 

2390 "instrument": "DummyCamComp", 

2391 "id": 20250101, 

2392 }, 

2393 ) 

2394 

2395 for i in range(n_exposures): 

2396 self.source_butler.registry.insertDimensionData( 

2397 "group", {"instrument": "DummyCamComp", "name": f"group{i}"} 

2398 ) 

2399 self.source_butler.registry.insertDimensionData( 

2400 "exposure", 

2401 { 

2402 "instrument": "DummyCamComp", 

2403 "id": i, 

2404 "obs_id": f"exp{i}", 

2405 "physical_filter": "d-r", 

2406 "group": f"group{i}", 

2407 "day_obs": 20250101, 

2408 }, 

2409 ) 

2410 

2411 # Create dataset types in the source butler. 

2412 dimensions = self.source_butler.dimensions.conform(["instrument", "exposure"]) 

2413 for datasetTypeName in datasetTypeNames: 

2414 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2415 self.source_butler.registry.registerDatasetType(datasetType) 

2416 

2417 # Write a dataset to an unrelated run -- this will ensure that 

2418 # we are rewriting integer dataset ids in the target if necessary. 

2419 # Will not be relevant for UUID. 

2420 run = "distraction" 

2421 butler = Butler.from_config(butler=self.source_butler, run=run) 

2422 butler.put( 

2423 makeExampleMetrics(), 

2424 datasetTypeName, 

2425 exposure=1, 

2426 instrument="DummyCamComp", 

2427 physical_filter="d-r", 

2428 ) 

2429 

2430 # Write some example metrics to the source 

2431 butler = Butler.from_config(butler=self.source_butler) 

2432 

2433 # Set of DatasetRefs that should be in the list of refs to transfer 

2434 # but which will not be transferred. 

2435 deleted: set[DatasetRef] = set() 

2436 

2437 n_expected = 20 # Number of datasets expected to be transferred 

2438 source_refs = [] 

2439 for i in range(n_exposures): 

2440 # Put a third of datasets into each collection, only retain 

2441 # two thirds. 

2442 index = i % 3 

2443 run = runs[index] 

2444 datasetTypeName = datasetTypeNames[i % 2] 

2445 

2446 metric = MetricsExample( 

2447 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2448 ) 

2449 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2450 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2451 

2452 # Remove the datastore record using low-level API, but only 

2453 # for a specific index. 

2454 if purge and index == 1: 

2455 # For one of these delete the file as well. 

2456 # This allows the "missing" code to filter the 

2457 # file out. 

2458 # Access the individual datastores. 

2459 datastores = [] 

2460 if hasattr(butler._datastore, "datastores"): 

2461 datastores.extend(butler._datastore.datastores) 

2462 else: 

2463 datastores.append(butler._datastore) 

2464 

2465 if not deleted: 

2466 # For a chained datastore we need to remove 

2467 # files in each chain. 

2468 for datastore in datastores: 

2469 # The file might not be known to the datastore 

2470 # if constraints are used. 

2471 try: 

2472 primary, uris = datastore.getURIs(ref) 

2473 except FileNotFoundError: 

2474 continue 

2475 if primary and primary.scheme != "mem": 

2476 primary.remove() 

2477 for uri in uris.values(): 

2478 if uri.scheme != "mem": 

2479 uri.remove() 

2480 n_expected -= 1 

2481 deleted.add(ref) 

2482 

2483 # Remove the datastore record. 

2484 for datastore in datastores: 

2485 if hasattr(datastore, "removeStoredItemInfo"): 

2486 datastore.removeStoredItemInfo(ref) 

2487 

2488 if index < 2: 

2489 source_refs.append(ref) 

2490 if ref not in deleted: 

2491 new_metric = butler.get(ref) 

2492 self.assertEqual(new_metric, metric) 

2493 

2494 # Create some bad dataset types to ensure we check for inconsistent 

2495 # definitions. 

2496 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2497 for datasetTypeName in datasetTypeNames: 

2498 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2499 self.target_butler.registry.registerDatasetType(datasetType) 

2500 with self.assertRaises(ConflictingDefinitionError) as cm: 

2501 self.target_butler.transfer_from(self.source_butler, source_refs) 

2502 self.assertIn("dataset type differs", str(cm.exception)) 

2503 

2504 # And remove the bad definitions. 

2505 for datasetTypeName in datasetTypeNames: 

2506 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2507 

2508 # Transfer without creating dataset types should fail. 

2509 with self.assertRaises(KeyError): 

2510 self.target_butler.transfer_from(self.source_butler, source_refs) 

2511 

2512 # Transfer without creating dimensions should fail. 

2513 with self.assertRaises(ConflictingDefinitionError) as cm: 

2514 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2515 self.assertIn("dimension", str(cm.exception)) 

2516 

2517 # The failed transfer above leaves registry in an inconsistent 

2518 # state because the run is created but then rolled back without 

2519 # the collection cache being cleared. For now force a refresh. 

2520 # Can remove with DM-35498. 

2521 self.target_butler.registry.refresh() 

2522 

2523 # Do a dry run -- this should not have any effect on the target butler. 

2524 self.target_butler.transfer_from(self.source_butler, source_refs, dry_run=True) 

2525 

2526 # Transfer the records for one ref to test the alternative API. 

2527 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2528 self.target_butler.transfer_dimension_records_from(self.source_butler, [source_refs[0]]) 

2529 self.assertIn("number of records transferred: 1", ";".join(log_cm.output)) 

2530 

2531 # Now transfer them to the second butler, including dimensions. 

2532 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2533 transferred = self.target_butler.transfer_from( 

2534 self.source_butler, 

2535 source_refs, 

2536 register_dataset_types=True, 

2537 transfer_dimensions=True, 

2538 ) 

2539 self.assertEqual(len(transferred), n_expected) 

2540 log_output = ";".join(log_cm.output) 

2541 

2542 # A ChainedDatastore will use the in-memory datastore for mexists 

2543 # so we can not rely on the mexists log message. 

2544 self.assertIn("Number of datastore records found in source", log_output) 

2545 self.assertIn("Creating output run", log_output) 

2546 

2547 # Do the transfer twice to ensure that it will do nothing extra. 

2548 # Only do this if purge=True because it does not work for int 

2549 # dataset_id. 

2550 if purge: 

2551 # This should not need to register dataset types. 

2552 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2553 self.assertEqual(len(transferred), n_expected) 

2554 

2555 # Also do an explicit low-level transfer to trigger some 

2556 # edge cases. 

2557 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2558 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs) 

2559 log_output = ";".join(log_cm.output) 

2560 self.assertIn("no file artifacts exist", log_output) 

2561 

2562 with self.assertRaises((TypeError, AttributeError)): 

2563 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2564 

2565 with self.assertRaises(ValueError): 

2566 self.target_butler._datastore.transfer_from( 

2567 self.source_butler._datastore, source_refs, transfer="split" 

2568 ) 

2569 

2570 # Now try to get the same refs from the new butler. 

2571 for ref in source_refs: 

2572 if ref not in deleted: 

2573 new_metric = self.target_butler.get(ref) 

2574 old_metric = self.source_butler.get(ref) 

2575 self.assertEqual(new_metric, old_metric) 

2576 

2577 # Now prune run2 collection and create instead a CHAINED collection. 

2578 # This should block the transfer. 

2579 self.target_butler.removeRuns(["run2"], unstore=True) 

2580 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2581 with self.assertRaises(CollectionTypeError): 

2582 # Re-importing the run1 datasets can be problematic if they 

2583 # use integer IDs so filter those out. 

2584 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2585 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2586 

2587 

2588class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2589 """Test transfers using a chained datastore.""" 

2590 

2591 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2592 

2593 

2594class NullDatastoreTestCase(unittest.TestCase): 

2595 """Test that we can fall back to a null datastore.""" 

2596 

2597 # Need a good config to create the repo. 

2598 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2599 storageClassFactory: StorageClassFactory 

2600 

2601 @classmethod 

2602 def setUpClass(cls) -> None: 

2603 cls.storageClassFactory = StorageClassFactory() 

2604 cls.storageClassFactory.addFromConfig(cls.configFile) 

2605 

2606 def setUp(self) -> None: 

2607 """Create a new butler root for each test.""" 

2608 self.root = makeTestTempDir(TESTDIR) 

2609 Butler.makeRepo(self.root, config=Config(self.configFile)) 

2610 

2611 def tearDown(self) -> None: 

2612 removeTestTempDir(self.root) 

2613 

2614 def test_fallback(self) -> None: 

2615 # Read the butler config and mess with the datastore section. 

2616 config_path = os.path.join(self.root, "butler.yaml") 

2617 bad_config = Config(config_path) 

2618 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" 

2619 bad_config.dumpToUri(config_path) 

2620 

2621 with self.assertRaises(RuntimeError): 

2622 Butler(self.root, without_datastore=False) 

2623 

2624 with self.assertRaises(RuntimeError): 

2625 Butler.from_config(self.root, without_datastore=False) 

2626 

2627 butler = Butler.from_config(self.root, writeable=True, without_datastore=True) 

2628 self.assertIsInstance(butler._datastore, NullDatastore) 

2629 

2630 # Check that registry is working. 

2631 butler.registry.registerRun("MYRUN") 

2632 collections = butler.registry.queryCollections(...) 

2633 self.assertIn("MYRUN", set(collections)) 

2634 

2635 # Create a ref. 

2636 dimensions = butler.dimensions.conform([]) 

2637 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2638 datasetTypeName = "metric" 

2639 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2640 butler.registry.registerDatasetType(datasetType) 

2641 ref = DatasetRef(datasetType, {}, run="MYRUN") 

2642 

2643 # Check that datastore will complain. 

2644 with self.assertRaises(FileNotFoundError): 

2645 butler.get(ref) 

2646 with self.assertRaises(FileNotFoundError): 

2647 butler.getURI(ref) 

2648 

2649 

2650@unittest.skipIf(create_test_server is None, "Server dependencies not installed.") 

2651class ButlerServerTests(FileDatastoreButlerTests, unittest.TestCase): 

2652 """Test RemoteButler and Butler server.""" 

2653 

2654 configFile = None 

2655 predictionSupported = False 

2656 trustModeSupported = False 

2657 

2658 def setUp(self): 

2659 self.server_instance = self.enterContext(create_test_server(TESTDIR)) 

2660 

2661 def tearDown(self): 

2662 pass 

2663 

2664 def are_uris_equivalent(self, uri1: ResourcePath, uri2: ResourcePath) -> bool: 

2665 # S3 pre-signed URLs may end up with differing expiration times in the 

2666 # query parameters, so ignore query parameters when comparing. 

2667 return uri1.scheme == uri2.scheme and uri1.netloc == uri2.netloc and uri1.path == uri2.path 

2668 

2669 def create_empty_butler(self, run: str | None = None, writeable: bool | None = None) -> Butler: 

2670 return self.server_instance.hybrid_butler._clone(run=run) 

2671 

2672 def remove_dataset_out_of_band(self, butler: Butler, ref: DatasetRef) -> None: 

2673 # Can't delete a file via S3 signed URLs, so we need to reach in 

2674 # through DirectButler to delete the dataset. 

2675 uri = self.server_instance.direct_butler.getURI(ref) 

2676 uri.remove() 

2677 

2678 def testConstructor(self): 

2679 # RemoteButler constructor is tested in test_server.py and 

2680 # test_remote_butler.py. 

2681 pass 

2682 

2683 def testDafButlerRepositories(self): 

2684 # Loading of RemoteButler via repository index is tested in 

2685 # test_server.py. 

2686 pass 

2687 

2688 def testGetDatasetTypes(self) -> None: 

2689 # This is mostly a test of validateConfiguration, which is for 

2690 # validating Datastore configuration and thus isn't relevant to 

2691 # RemoteButler. 

2692 pass 

2693 

2694 def testMakeRepo(self) -> None: 

2695 # Only applies to DirectButler. 

2696 pass 

2697 

2698 # Pickling not yet implemented for RemoteButler/HybridButler. 

2699 @unittest.expectedFailure 

2700 def testPickle(self) -> None: 

2701 return super().testPickle() 

2702 

2703 def testStringification(self) -> None: 

2704 self.assertEqual( 

2705 str(self.server_instance.remote_butler), 

2706 "RemoteButler(https://test.example/api/butler/repo/testrepo)", 

2707 ) 

2708 

2709 def testTransaction(self) -> None: 

2710 # Transactions will never be supported for RemoteButler. 

2711 pass 

2712 

2713 def testPutTemplates(self) -> None: 

2714 # The Butler server instance is configured with different file naming 

2715 # templates than this test is expecting. 

2716 pass 

2717 

2718 

2719def setup_module(module: types.ModuleType) -> None: 

2720 """Set up the module for pytest.""" 

2721 clean_environment() 

2722 

2723 

2724if __name__ == "__main__": 

2725 clean_environment() 

2726 unittest.main()