Coverage for tests/test_butler.py: 14%

1486 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-15 02:03 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Tests for Butler. 

29""" 

30from __future__ import annotations 

31 

32import gc 

33import json 

34import logging 

35import os 

36import pathlib 

37import pickle 

38import posixpath 

39import random 

40import shutil 

41import string 

42import tempfile 

43import unittest 

44import uuid 

45from collections.abc import Callable, Mapping 

46from typing import TYPE_CHECKING, Any, cast 

47 

48try: 

49 import boto3 

50 import botocore 

51 from lsst.resources.s3utils import clean_test_environment_for_s3 

52 

53 try: 

54 from moto import mock_aws # v5 

55 except ImportError: 

56 from moto import mock_s3 as mock_aws 

57except ImportError: 

58 boto3 = None 

59 

60 def mock_aws(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] 

61 """No-op decorator in case moto mock_aws can not be imported.""" 

62 return None 

63 

64 

65try: 

66 from lsst.daf.butler.tests.server import create_test_server 

67except ImportError: 

68 create_test_server = None 

69 

70try: 

71 # It's possible but silly to have testing.postgresql installed without 

72 # having the postgresql server installed (because then nothing in 

73 # testing.postgresql would work), so we use the presence of that module 

74 # to test whether we can expect the server to be available. 

75 import testing.postgresql # type: ignore[import] 

76except ImportError: 

77 testing = None 

78 

79import astropy.time 

80import sqlalchemy 

81from lsst.daf.butler import ( 

82 Butler, 

83 ButlerConfig, 

84 ButlerRepoIndex, 

85 CollectionCycleError, 

86 CollectionType, 

87 Config, 

88 DataCoordinate, 

89 DatasetExistence, 

90 DatasetNotFoundError, 

91 DatasetRef, 

92 DatasetType, 

93 FileDataset, 

94 NoDefaultCollectionError, 

95 StorageClassFactory, 

96 ValidationError, 

97 script, 

98) 

99from lsst.daf.butler.datastore import NullDatastore 

100from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError 

101from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

102from lsst.daf.butler.direct_butler import DirectButler 

103from lsst.daf.butler.registry import ( 

104 CollectionError, 

105 CollectionTypeError, 

106 ConflictingDefinitionError, 

107 DataIdValueError, 

108 MissingCollectionError, 

109 OrphanedRecordError, 

110) 

111from lsst.daf.butler.registry.sql_registry import SqlRegistry 

112from lsst.daf.butler.repo_relocation import BUTLER_ROOT_TAG 

113from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

114from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

115from lsst.resources import ResourcePath 

116from lsst.utils import doImportType 

117from lsst.utils.introspection import get_full_type_name 

118 

119if TYPE_CHECKING: 

120 import types 

121 

122 from lsst.daf.butler import DimensionGroup, Registry, StorageClass 

123 

124TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

125 

126 

127def clean_environment() -> None: 

128 """Remove external environment variables that affect the tests.""" 

129 for k in ("DAF_BUTLER_REPOSITORY_INDEX",): 

130 os.environ.pop(k, None) 

131 

132 

133def makeExampleMetrics() -> MetricsExample: 

134 """Return example dataset suitable for tests.""" 

135 return MetricsExample( 

136 {"AM1": 5.2, "AM2": 30.6}, 

137 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

138 [563, 234, 456.7, 752, 8, 9, 27], 

139 ) 

140 

141 

142class TransactionTestError(Exception): 

143 """Specific error for testing transactions, to prevent misdiagnosing 

144 that might otherwise occur when a standard exception is used. 

145 """ 

146 

147 pass 

148 

149 

150class ButlerConfigTests(unittest.TestCase): 

151 """Simple tests for ButlerConfig that are not tested in any other test 

152 cases. 

153 """ 

154 

155 def testSearchPath(self) -> None: 

156 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

157 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

158 config1 = ButlerConfig(configFile) 

159 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

160 

161 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

162 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

163 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

164 self.assertIn("testConfigs", "\n".join(cm.output)) 

165 

166 key = ("datastore", "records", "table") 

167 self.assertNotEqual(config1[key], config2[key]) 

168 self.assertEqual(config2[key], "override_record") 

169 

170 

171class ButlerPutGetTests(TestCaseMixin): 

172 """Helper method for running a suite of put/get tests from different 

173 butler configurations. 

174 """ 

175 

176 root: str 

177 default_run = "ingésτ😺" 

178 storageClassFactory: StorageClassFactory 

179 configFile: str | None 

180 tmpConfigFile: str 

181 

182 @staticmethod 

183 def addDatasetType( 

184 datasetTypeName: str, dimensions: DimensionGroup, storageClass: StorageClass | str, registry: Registry 

185 ) -> DatasetType: 

186 """Create a DatasetType and register it""" 

187 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

188 registry.registerDatasetType(datasetType) 

189 return datasetType 

190 

191 @classmethod 

192 def setUpClass(cls) -> None: 

193 cls.storageClassFactory = StorageClassFactory() 

194 if cls.configFile is not None: 

195 cls.storageClassFactory.addFromConfig(cls.configFile) 

196 

197 def assertGetComponents( 

198 self, 

199 butler: Butler, 

200 datasetRef: DatasetRef, 

201 components: tuple[str, ...], 

202 reference: Any, 

203 collections: Any = None, 

204 ) -> None: 

205 datasetType = datasetRef.datasetType 

206 dataId = datasetRef.dataId 

207 deferred = butler.getDeferred(datasetRef) 

208 

209 for component in components: 

210 compTypeName = datasetType.componentTypeName(component) 

211 result = butler.get(compTypeName, dataId, collections=collections) 

212 self.assertEqual(result, getattr(reference, component)) 

213 result_deferred = deferred.get(component=component) 

214 self.assertEqual(result_deferred, result) 

215 

216 def tearDown(self) -> None: 

217 if self.root is not None: 

218 removeTestTempDir(self.root) 

219 

220 def create_empty_butler(self, run: str | None = None, writeable: bool | None = None): 

221 """Create a Butler for the test repository, without inserting test 

222 data. 

223 """ 

224 butler = Butler.from_config(self.tmpConfigFile, run=run, writeable=writeable) 

225 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

226 return butler 

227 

228 def create_butler( 

229 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

230 ) -> tuple[Butler, DatasetType]: 

231 """Create a Butler for the test repository and insert some test data 

232 into it. 

233 """ 

234 butler = self.create_empty_butler(run=run) 

235 

236 collections = set(butler.registry.queryCollections()) 

237 self.assertEqual(collections, {run}) 

238 # Create and register a DatasetType 

239 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

240 

241 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

242 

243 # Add needed Dimensions 

244 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

245 butler.registry.insertDimensionData( 

246 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

247 ) 

248 butler.registry.insertDimensionData( 

249 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

250 ) 

251 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20200101}) 

252 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

253 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

254 butler.registry.insertDimensionData( 

255 "visit", 

256 { 

257 "instrument": "DummyCamComp", 

258 "id": 423, 

259 "name": "fourtwentythree", 

260 "physical_filter": "d-r", 

261 "datetime_begin": visit_start, 

262 "datetime_end": visit_end, 

263 "day_obs": 20200101, 

264 }, 

265 ) 

266 

267 # Add more visits for some later tests 

268 for visit_id in (424, 425): 

269 butler.registry.insertDimensionData( 

270 "visit", 

271 { 

272 "instrument": "DummyCamComp", 

273 "id": visit_id, 

274 "name": f"fourtwentyfour_{visit_id}", 

275 "physical_filter": "d-r", 

276 "day_obs": 20200101, 

277 }, 

278 ) 

279 return butler, datasetType 

280 

281 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: 

282 # New datasets will be added to run and tag, but we will only look in 

283 # tag when looking up datasets. 

284 run = self.default_run 

285 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

286 assert butler.run is not None 

287 

288 # Create and store a dataset 

289 metric = makeExampleMetrics() 

290 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

291 

292 # Dataset should not exist if we haven't added it 

293 with self.assertRaises(DatasetNotFoundError): 

294 butler.get(datasetTypeName, dataId) 

295 

296 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

297 # and once with a DatasetType 

298 

299 # Keep track of any collections we add and do not clean up 

300 expected_collections = {run} 

301 

302 counter = 0 

303 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

304 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

305 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

306 # Since we are using subTest we can get cascading failures 

307 # here with the first attempt failing and the others failing 

308 # immediately because the dataset already exists. Work around 

309 # this by using a distinct run collection each time 

310 counter += 1 

311 this_run = f"put_run_{counter}" 

312 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

313 expected_collections.update({this_run}) 

314 

315 with self.subTest(args=args): 

316 kwargs: dict[str, Any] = {} 

317 if not isinstance(args[0], DatasetRef): # type: ignore 

318 kwargs["run"] = this_run 

319 ref = butler.put(metric, *args, **kwargs) 

320 self.assertIsInstance(ref, DatasetRef) 

321 

322 # Test get of a ref. 

323 metricOut = butler.get(ref) 

324 self.assertEqual(metric, metricOut) 

325 # Test get 

326 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

327 self.assertEqual(metric, metricOut) 

328 # Test get with a datasetRef 

329 metricOut = butler.get(ref) 

330 self.assertEqual(metric, metricOut) 

331 # Test getDeferred with dataId 

332 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

333 self.assertEqual(metric, metricOut) 

334 # Test getDeferred with a ref 

335 metricOut = butler.getDeferred(ref).get() 

336 self.assertEqual(metric, metricOut) 

337 

338 # Check we can get components 

339 if storageClass.isComposite(): 

340 self.assertGetComponents( 

341 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

342 ) 

343 

344 primary_uri, secondary_uris = butler.getURIs(ref) 

345 n_uris = len(secondary_uris) 

346 if primary_uri: 

347 n_uris += 1 

348 

349 # Can the artifacts themselves be retrieved? 

350 if not butler._datastore.isEphemeral: 

351 # Create a temporary directory to hold the retrieved 

352 # artifacts. 

353 with tempfile.TemporaryDirectory( 

354 prefix="butler-artifacts-", ignore_cleanup_errors=True 

355 ) as artifact_root: 

356 root_uri = ResourcePath(artifact_root, forceDirectory=True) 

357 

358 for preserve_path in (True, False): 

359 destination = root_uri.join(f"{preserve_path}_{counter}/") 

360 log = logging.getLogger("lsst.x") 

361 log.warning("Using destination %s for args %s", destination, args) 

362 # Use copy so that we can test that overwrite 

363 # protection works (using "auto" for File URIs 

364 # would use hard links and subsequent transfer 

365 # would work because it knows they are the same 

366 # file). 

367 transferred = butler.retrieveArtifacts( 

368 [ref], destination, preserve_path=preserve_path, transfer="copy" 

369 ) 

370 self.assertGreater(len(transferred), 0) 

371 artifacts = list(ResourcePath.findFileResources([destination])) 

372 self.assertEqual(set(transferred), set(artifacts)) 

373 

374 for artifact in transferred: 

375 path_in_destination = artifact.relative_to(destination) 

376 self.assertIsNotNone(path_in_destination) 

377 assert path_in_destination is not None 

378 

379 # When path is not preserved there should not 

380 # be any path separators. 

381 num_seps = path_in_destination.count("/") 

382 if preserve_path: 

383 self.assertGreater(num_seps, 0) 

384 else: 

385 self.assertEqual(num_seps, 0) 

386 

387 self.assertEqual( 

388 len(artifacts), 

389 n_uris, 

390 "Comparing expected artifacts vs actual:" 

391 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

392 ) 

393 

394 if preserve_path: 

395 # No need to run these twice 

396 with self.assertRaises(ValueError): 

397 butler.retrieveArtifacts([ref], destination, transfer="move") 

398 

399 with self.assertRaisesRegex( 

400 ValueError, "^Destination location must refer to a directory" 

401 ): 

402 butler.retrieveArtifacts( 

403 [ref], ResourcePath("/some/file.txt", forceDirectory=False) 

404 ) 

405 

406 with self.assertRaises(FileExistsError): 

407 butler.retrieveArtifacts([ref], destination) 

408 

409 transferred_again = butler.retrieveArtifacts( 

410 [ref], destination, preserve_path=preserve_path, overwrite=True 

411 ) 

412 self.assertEqual(set(transferred_again), set(transferred)) 

413 

414 # Now remove the dataset completely. 

415 butler.pruneDatasets([ref], purge=True, unstore=True) 

416 # Lookup with original args should still fail. 

417 kwargs = {"collections": this_run} 

418 if isinstance(args[0], DatasetRef): 

419 kwargs = {} # Prevent warning from being issued. 

420 self.assertFalse(butler.exists(*args, **kwargs)) 

421 # get() should still fail. 

422 with self.assertRaises((FileNotFoundError, DatasetNotFoundError)): 

423 butler.get(ref) 

424 # Registry shouldn't be able to find it by dataset_id anymore. 

425 self.assertIsNone(butler.get_dataset(ref.id)) 

426 

427 # Do explicit registry removal since we know they are 

428 # empty 

429 butler.registry.removeCollection(this_run) 

430 expected_collections.remove(this_run) 

431 

432 # Create DatasetRef for put using default run. 

433 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

434 

435 # Check that getDeferred fails with standalone ref. 

436 with self.assertRaises(LookupError): 

437 butler.getDeferred(refIn) 

438 

439 # Put the dataset again, since the last thing we did was remove it 

440 # and we want to use the default collection. 

441 ref = butler.put(metric, refIn) 

442 

443 # Get with parameters 

444 stop = 4 

445 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

446 self.assertNotEqual(metric, sliced) 

447 self.assertEqual(metric.summary, sliced.summary) 

448 self.assertEqual(metric.output, sliced.output) 

449 assert metric.data is not None # for mypy 

450 self.assertEqual(metric.data[:stop], sliced.data) 

451 # getDeferred with parameters 

452 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

453 self.assertNotEqual(metric, sliced) 

454 self.assertEqual(metric.summary, sliced.summary) 

455 self.assertEqual(metric.output, sliced.output) 

456 self.assertEqual(metric.data[:stop], sliced.data) 

457 # getDeferred with deferred parameters 

458 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

459 self.assertNotEqual(metric, sliced) 

460 self.assertEqual(metric.summary, sliced.summary) 

461 self.assertEqual(metric.output, sliced.output) 

462 self.assertEqual(metric.data[:stop], sliced.data) 

463 

464 if storageClass.isComposite(): 

465 # Check that components can be retrieved 

466 metricOut = butler.get(ref.datasetType.name, dataId) 

467 compNameS = ref.datasetType.componentTypeName("summary") 

468 compNameD = ref.datasetType.componentTypeName("data") 

469 summary = butler.get(compNameS, dataId) 

470 self.assertEqual(summary, metric.summary) 

471 data = butler.get(compNameD, dataId) 

472 self.assertEqual(data, metric.data) 

473 

474 if "counter" in storageClass.derivedComponents: 

475 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

476 self.assertEqual(count, len(data)) 

477 

478 count = butler.get( 

479 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

480 ) 

481 self.assertEqual(count, stop) 

482 

483 compRef = butler.find_dataset(compNameS, dataId, collections=butler.collections) 

484 assert compRef is not None 

485 summary = butler.get(compRef) 

486 self.assertEqual(summary, metric.summary) 

487 

488 # Create a Dataset type that has the same name but is inconsistent. 

489 inconsistentDatasetType = DatasetType( 

490 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

491 ) 

492 

493 # Getting with a dataset type that does not match registry fails 

494 with self.assertRaisesRegex( 

495 ValueError, 

496 "(Supplied dataset type .* inconsistent with registry)" 

497 "|(The new storage class .* is not compatible with the existing storage class)", 

498 ): 

499 butler.get(inconsistentDatasetType, dataId) 

500 

501 # Combining a DatasetRef with a dataId should fail 

502 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

503 butler.get(ref, dataId) 

504 # Getting with an explicit ref should fail if the id doesn't match. 

505 with self.assertRaises((FileNotFoundError, DatasetNotFoundError)): 

506 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

507 

508 # Getting a dataset with unknown parameters should fail 

509 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

510 butler.get(ref, parameters={"unsupported": True}) 

511 

512 # Check we have a collection 

513 collections = set(butler.registry.queryCollections()) 

514 self.assertEqual(collections, expected_collections) 

515 

516 # Clean up to check that we can remove something that may have 

517 # already had a component removed 

518 butler.pruneDatasets([ref], unstore=True, purge=True) 

519 

520 # Add the same ref again, so we can check that duplicate put fails. 

521 ref = butler.put(metric, datasetType, dataId) 

522 

523 # Repeat put will fail. 

524 with self.assertRaisesRegex( 

525 ConflictingDefinitionError, "A database constraint failure was triggered" 

526 ): 

527 butler.put(metric, datasetType, dataId) 

528 

529 # Remove the datastore entry. 

530 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

531 

532 # Put will still fail 

533 with self.assertRaisesRegex( 

534 ConflictingDefinitionError, "A database constraint failure was triggered" 

535 ): 

536 butler.put(metric, datasetType, dataId) 

537 

538 # Repeat the same sequence with resolved ref. 

539 butler.pruneDatasets([ref], unstore=True, purge=True) 

540 ref = butler.put(metric, refIn) 

541 

542 # Repeat put will fail. 

543 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

544 butler.put(metric, refIn) 

545 

546 # Remove the datastore entry. 

547 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

548 

549 # In case of resolved ref this write will succeed. 

550 ref = butler.put(metric, refIn) 

551 

552 # Leave the dataset in place since some downstream tests require 

553 # something to be present 

554 

555 return butler 

556 

557 def testDeferredCollectionPassing(self) -> None: 

558 # Construct a butler with no run or collection, but make it writeable. 

559 butler = self.create_empty_butler(writeable=True) 

560 # Create and register a DatasetType 

561 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

562 datasetType = self.addDatasetType( 

563 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

564 ) 

565 # Add needed Dimensions 

566 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

567 butler.registry.insertDimensionData( 

568 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

569 ) 

570 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20250101}) 

571 butler.registry.insertDimensionData( 

572 "visit", 

573 { 

574 "instrument": "DummyCamComp", 

575 "id": 423, 

576 "name": "fourtwentythree", 

577 "physical_filter": "d-r", 

578 "day_obs": 20250101, 

579 }, 

580 ) 

581 dataId = {"instrument": "DummyCamComp", "visit": 423} 

582 # Create dataset. 

583 metric = makeExampleMetrics() 

584 # Register a new run and put dataset. 

585 run = "deferred" 

586 self.assertTrue(butler.registry.registerRun(run)) 

587 # Second time it will be allowed but indicate no-op 

588 self.assertFalse(butler.registry.registerRun(run)) 

589 ref = butler.put(metric, datasetType, dataId, run=run) 

590 # Putting with no run should fail with TypeError. 

591 with self.assertRaises(CollectionError): 

592 butler.put(metric, datasetType, dataId) 

593 # Dataset should exist. 

594 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

595 # We should be able to get the dataset back, but with and without 

596 # a deferred dataset handle. 

597 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

598 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

599 # Trying to find the dataset without any collection is an error. 

600 with self.assertRaises(NoDefaultCollectionError): 

601 butler.exists(datasetType, dataId) 

602 with self.assertRaises(CollectionError): 

603 butler.get(datasetType, dataId) 

604 # Associate the dataset with a different collection. 

605 butler.registry.registerCollection("tagged") 

606 butler.registry.associate("tagged", [ref]) 

607 # Deleting the dataset from the new collection should make it findable 

608 # in the original collection. 

609 butler.pruneDatasets([ref], tags=["tagged"]) 

610 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

611 

612 

613class ButlerTests(ButlerPutGetTests): 

614 """Tests for Butler.""" 

615 

616 useTempRoot = True 

617 validationCanFail: bool 

618 fullConfigKey: str | None 

619 registryStr: str | None 

620 datastoreName: list[str] | None 

621 datastoreStr: list[str] 

622 predictionSupported = True 

623 """Does getURIs support 'prediction mode'?""" 

624 

625 def setUp(self) -> None: 

626 """Create a new butler root for each test.""" 

627 self.root = makeTestTempDir(TESTDIR) 

628 Butler.makeRepo(self.root, config=Config(self.configFile)) 

629 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

630 

631 def are_uris_equivalent(self, uri1: ResourcePath, uri2: ResourcePath) -> bool: 

632 """Return True if two URIs refer to the same resource. 

633 

634 Subclasses may override to handle unique requirements. 

635 """ 

636 return uri1 == uri2 

637 

638 def testConstructor(self) -> None: 

639 """Independent test of constructor.""" 

640 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

641 self.assertIsInstance(butler, Butler) 

642 

643 # Check that butler.yaml is added automatically. 

644 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

645 config_dir = self.tmpConfigFile[: -len(end)] 

646 butler = Butler.from_config(config_dir, run=self.default_run) 

647 self.assertIsInstance(butler, Butler) 

648 

649 # Even with a ResourcePath. 

650 butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

651 self.assertIsInstance(butler, Butler) 

652 

653 collections = set(butler.registry.queryCollections()) 

654 self.assertEqual(collections, {self.default_run}) 

655 

656 # Check that some special characters can be included in run name. 

657 special_run = "u@b.c-A" 

658 butler_special = Butler.from_config(butler=butler, run=special_run) 

659 collections = set(butler_special.registry.queryCollections("*@*")) 

660 self.assertEqual(collections, {special_run}) 

661 

662 butler2 = Butler.from_config(butler=butler, collections=["other"]) 

663 self.assertEqual(butler2.collections, ("other",)) 

664 self.assertIsNone(butler2.run) 

665 self.assertEqual(type(butler._datastore), type(butler2._datastore)) 

666 self.assertEqual(butler._datastore.config, butler2._datastore.config) 

667 

668 # Test that we can use an environment variable to find this 

669 # repository. 

670 butler_index = Config() 

671 butler_index["label"] = self.tmpConfigFile 

672 for suffix in (".yaml", ".json"): 

673 # Ensure that the content differs so that we know that 

674 # we aren't reusing the cache. 

675 bad_label = f"file://bucket/not_real{suffix}" 

676 butler_index["bad_label"] = bad_label 

677 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

678 butler_index.dumpToUri(temp_file) 

679 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

680 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"}) 

681 uri = Butler.get_repo_uri("bad_label") 

682 self.assertEqual(uri, ResourcePath(bad_label)) 

683 uri = Butler.get_repo_uri("label") 

684 butler = Butler.from_config(uri, writeable=False) 

685 self.assertIsInstance(butler, Butler) 

686 butler = Butler.from_config("label", writeable=False) 

687 self.assertIsInstance(butler, Butler) 

688 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

689 Butler.from_config("not_there", writeable=False) 

690 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): 

691 Butler.from_config("bad_label") 

692 with self.assertRaises(FileNotFoundError): 

693 # Should ignore aliases. 

694 Butler.from_config(ResourcePath("label", forceAbsolute=False)) 

695 with self.assertRaises(KeyError) as cm: 

696 Butler.get_repo_uri("missing") 

697 self.assertEqual( 

698 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False) 

699 ) 

700 self.assertIn("not known to", str(cm.exception)) 

701 # Should report no failure. 

702 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "") 

703 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

704 # Now with empty configuration. 

705 butler_index = Config() 

706 butler_index.dumpToUri(temp_file) 

707 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

708 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): 

709 Butler.from_config("label") 

710 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

711 # Now with bad contents. 

712 with open(temp_file.ospath, "w") as fh: 

713 print("'", file=fh) 

714 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

715 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): 

716 Butler.from_config("label") 

717 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

718 with self.assertRaises(FileNotFoundError): 

719 Butler.get_repo_uri("label") 

720 self.assertEqual(Butler.get_known_repos(), set()) 

721 

722 with self.assertRaisesRegex(FileNotFoundError, "index file not found"): 

723 Butler.from_config("label") 

724 

725 # Check that we can create Butler when the alias file is not found. 

726 butler = Butler.from_config(self.tmpConfigFile, writeable=False) 

727 self.assertIsInstance(butler, Butler) 

728 with self.assertRaises(RuntimeError) as cm: 

729 # No environment variable set. 

730 Butler.get_repo_uri("label") 

731 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) 

732 self.assertIn("No repository index defined", str(cm.exception)) 

733 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): 

734 # No aliases registered. 

735 Butler.from_config("not_there") 

736 self.assertEqual(Butler.get_known_repos(), set()) 

737 

738 def testDafButlerRepositories(self): 

739 with unittest.mock.patch.dict( 

740 os.environ, 

741 {"DAF_BUTLER_REPOSITORIES": "label: 'https://someuri.com'\notherLabel: 'https://otheruri.com'\n"}, 

742 ): 

743 self.assertEqual(str(Butler.get_repo_uri("label")), "https://someuri.com") 

744 

745 with unittest.mock.patch.dict( 

746 os.environ, 

747 { 

748 "DAF_BUTLER_REPOSITORIES": "label: https://someuri.com", 

749 "DAF_BUTLER_REPOSITORY_INDEX": "https://someuri.com", 

750 }, 

751 ): 

752 with self.assertRaisesRegex(RuntimeError, "Only one of the environment variables"): 

753 Butler.get_repo_uri("label") 

754 

755 with unittest.mock.patch.dict( 

756 os.environ, 

757 {"DAF_BUTLER_REPOSITORIES": "invalid"}, 

758 ): 

759 with self.assertRaisesRegex(ValueError, "Repository index not in expected format"): 

760 Butler.get_repo_uri("label") 

761 

762 def testBasicPutGet(self) -> None: 

763 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

764 self.runPutGetTest(storageClass, "test_metric") 

765 

766 def testCompositePutGetConcrete(self) -> None: 

767 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

768 butler = self.runPutGetTest(storageClass, "test_metric") 

769 

770 # Should *not* be disassembled 

771 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

772 self.assertEqual(len(datasets), 1) 

773 uri, components = butler.getURIs(datasets[0]) 

774 self.assertIsInstance(uri, ResourcePath) 

775 self.assertFalse(components) 

776 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

777 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

778 

779 # Predicted dataset 

780 if self.predictionSupported: 

781 dataId = {"instrument": "DummyCamComp", "visit": 424} 

782 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

783 self.assertFalse(components) 

784 self.assertIsInstance(uri, ResourcePath) 

785 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

786 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

787 

788 def testCompositePutGetVirtual(self) -> None: 

789 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

790 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

791 

792 # Should be disassembled 

793 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

794 self.assertEqual(len(datasets), 1) 

795 uri, components = butler.getURIs(datasets[0]) 

796 

797 if butler._datastore.isEphemeral: 

798 # Never disassemble in-memory datastore 

799 self.assertIsInstance(uri, ResourcePath) 

800 self.assertFalse(components) 

801 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

802 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

803 else: 

804 self.assertIsNone(uri) 

805 self.assertEqual(set(components), set(storageClass.components)) 

806 for compuri in components.values(): 

807 self.assertIsInstance(compuri, ResourcePath) 

808 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

809 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

810 

811 if self.predictionSupported: 

812 # Predicted dataset 

813 dataId = {"instrument": "DummyCamComp", "visit": 424} 

814 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

815 

816 if butler._datastore.isEphemeral: 

817 # Never disassembled 

818 self.assertIsInstance(uri, ResourcePath) 

819 self.assertFalse(components) 

820 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

821 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

822 else: 

823 self.assertIsNone(uri) 

824 self.assertEqual(set(components), set(storageClass.components)) 

825 for compuri in components.values(): 

826 self.assertIsInstance(compuri, ResourcePath) 

827 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

828 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

829 

830 def testStorageClassOverrideGet(self) -> None: 

831 """Test storage class conversion on get with override.""" 

832 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

833 datasetTypeName = "anything" 

834 run = self.default_run 

835 

836 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

837 

838 # Create and store a dataset. 

839 metric = makeExampleMetrics() 

840 dataId = {"instrument": "DummyCamComp", "visit": 423} 

841 

842 ref = butler.put(metric, datasetType, dataId) 

843 

844 # Return native type. 

845 retrieved = butler.get(ref) 

846 self.assertEqual(retrieved, metric) 

847 

848 # Specify an override. 

849 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

850 model = butler.get(ref, storageClass=new_sc) 

851 self.assertNotEqual(type(model), type(retrieved)) 

852 self.assertIs(type(model), new_sc.pytype) 

853 self.assertEqual(retrieved, model) 

854 

855 # Defer but override later. 

856 deferred = butler.getDeferred(ref) 

857 model = deferred.get(storageClass=new_sc) 

858 self.assertIs(type(model), new_sc.pytype) 

859 self.assertEqual(retrieved, model) 

860 

861 # Defer but override up front. 

862 deferred = butler.getDeferred(ref, storageClass=new_sc) 

863 model = deferred.get() 

864 self.assertIs(type(model), new_sc.pytype) 

865 self.assertEqual(retrieved, model) 

866 

867 # Retrieve a component. Should be a tuple. 

868 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

869 self.assertIs(type(data), tuple) 

870 self.assertEqual(data, tuple(retrieved.data)) 

871 

872 # Parameter on the write storage class should work regardless 

873 # of read storage class. 

874 data = butler.get( 

875 "anything.data", 

876 dataId, 

877 storageClass="StructuredDataDataTestTuple", 

878 parameters={"slice": slice(2, 4)}, 

879 ) 

880 self.assertEqual(len(data), 2) 

881 

882 # Try a parameter that is known to the read storage class but not 

883 # the write storage class. 

884 with self.assertRaises(KeyError): 

885 butler.get( 

886 "anything.data", 

887 dataId, 

888 storageClass="StructuredDataDataTestTuple", 

889 parameters={"xslice": slice(2, 4)}, 

890 ) 

891 

892 def testPytypePutCoercion(self) -> None: 

893 """Test python type coercion on Butler.get and put.""" 

894 # Store some data with the normal example storage class. 

895 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

896 datasetTypeName = "test_metric" 

897 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

898 

899 dataId = {"instrument": "DummyCamComp", "visit": 423} 

900 

901 # Put a dict and this should coerce to a MetricsExample 

902 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

903 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

904 test_metric = butler.get(metric_ref) 

905 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

906 self.assertEqual(test_metric.summary, test_dict["summary"]) 

907 self.assertEqual(test_metric.output, test_dict["output"]) 

908 

909 # Check that the put still works if a DatasetType is given with 

910 # a definition matching this python type. 

911 registry_type = butler.get_dataset_type(datasetTypeName) 

912 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

913 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

914 self.assertEqual(metric2_ref.datasetType, registry_type) 

915 

916 # The get will return the type expected by registry. 

917 test_metric2 = butler.get(metric2_ref) 

918 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

919 

920 # Make a new DatasetRef with the compatible but different DatasetType. 

921 # This should now return a dict. 

922 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

923 test_dict2 = butler.get(new_ref) 

924 self.assertEqual(get_full_type_name(test_dict2), "dict") 

925 

926 # Get it again with the wrong dataset type definition using get() 

927 # rather than get(). This should be consistent with get() 

928 # behavior and return the type of the DatasetType. 

929 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

930 self.assertEqual(get_full_type_name(test_dict3), "dict") 

931 

932 def testIngest(self) -> None: 

933 butler = self.create_empty_butler(run=self.default_run) 

934 

935 # Create and register a DatasetType 

936 dimensions = butler.dimensions.conform(["instrument", "visit", "detector"]) 

937 

938 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

939 datasetTypeName = "metric" 

940 

941 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

942 

943 # Add needed Dimensions 

944 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

945 butler.registry.insertDimensionData( 

946 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

947 ) 

948 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20250101}) 

949 for detector in (1, 2): 

950 butler.registry.insertDimensionData( 

951 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

952 ) 

953 

954 butler.registry.insertDimensionData( 

955 "visit", 

956 { 

957 "instrument": "DummyCamComp", 

958 "id": 423, 

959 "name": "fourtwentythree", 

960 "physical_filter": "d-r", 

961 "day_obs": 20250101, 

962 }, 

963 { 

964 "instrument": "DummyCamComp", 

965 "id": 424, 

966 "name": "fourtwentyfour", 

967 "physical_filter": "d-r", 

968 "day_obs": 20250101, 

969 }, 

970 ) 

971 

972 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

973 dataRoot = os.path.join(TESTDIR, "data", "basic") 

974 datasets = [] 

975 for detector in (1, 2): 

976 detector_name = f"detector_{detector}" 

977 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

978 dataId = butler.registry.expandDataId( 

979 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

980 ) 

981 # Create a DatasetRef for ingest 

982 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

983 

984 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

985 

986 butler.ingest(*datasets, transfer="copy") 

987 

988 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

989 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

990 

991 metrics1 = butler.get(datasetTypeName, dataId1) 

992 metrics2 = butler.get(datasetTypeName, dataId2) 

993 self.assertNotEqual(metrics1, metrics2) 

994 

995 # Compare URIs 

996 uri1 = butler.getURI(datasetTypeName, dataId1) 

997 uri2 = butler.getURI(datasetTypeName, dataId2) 

998 self.assertFalse(self.are_uris_equivalent(uri1, uri2), f"Cf. {uri1} with {uri2}") 

999 

1000 # Now do a multi-dataset but single file ingest 

1001 metricFile = os.path.join(dataRoot, "detectors.yaml") 

1002 refs = [] 

1003 for detector in (1, 2): 

1004 detector_name = f"detector_{detector}" 

1005 dataId = butler.registry.expandDataId( 

1006 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

1007 ) 

1008 # Create a DatasetRef for ingest 

1009 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

1010 

1011 # Test "move" transfer to ensure that the files themselves 

1012 # have disappeared following ingest. 

1013 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

1014 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

1015 

1016 datasets = [] 

1017 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

1018 

1019 # For first ingest use copy. 

1020 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

1021 

1022 # Now try to ingest again in "execution butler" mode where 

1023 # the registry entries exist but the datastore does not have 

1024 # the files. We also need to strip the dimension records to ensure 

1025 # that they will be re-added by the ingest. 

1026 ref = datasets[0].refs[0] 

1027 datasets[0].refs = [ 

1028 cast( 

1029 DatasetRef, 

1030 butler.find_dataset(ref.datasetType, data_id=ref.dataId, collections=ref.run), 

1031 ) 

1032 for ref in datasets[0].refs 

1033 ] 

1034 all_refs = [] 

1035 for dataset in datasets: 

1036 refs = [] 

1037 for ref in dataset.refs: 

1038 # Create a dict from the dataId to drop the records. 

1039 new_data_id = dict(ref.dataId.required) 

1040 new_ref = butler.find_dataset(ref.datasetType, new_data_id, collections=ref.run) 

1041 assert new_ref is not None 

1042 self.assertFalse(new_ref.dataId.hasRecords()) 

1043 refs.append(new_ref) 

1044 dataset.refs = refs 

1045 all_refs.extend(dataset.refs) 

1046 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

1047 

1048 # Use move mode to test that the file is deleted. Also 

1049 # disable recording of file size. 

1050 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

1051 

1052 # Check that every ref now has records. 

1053 for dataset in datasets: 

1054 for ref in dataset.refs: 

1055 self.assertTrue(ref.dataId.hasRecords()) 

1056 

1057 # Ensure that the file has disappeared. 

1058 self.assertFalse(tempFile.exists()) 

1059 

1060 # Check that the datastore recorded no file size. 

1061 # Not all datastores can support this. 

1062 try: 

1063 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

1064 self.assertEqual(infos[0].file_size, -1) 

1065 except AttributeError: 

1066 pass 

1067 

1068 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

1069 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

1070 

1071 multi1 = butler.get(datasetTypeName, dataId1) 

1072 multi2 = butler.get(datasetTypeName, dataId2) 

1073 

1074 self.assertEqual(multi1, metrics1) 

1075 self.assertEqual(multi2, metrics2) 

1076 

1077 # Compare URIs 

1078 uri1 = butler.getURI(datasetTypeName, dataId1) 

1079 uri2 = butler.getURI(datasetTypeName, dataId2) 

1080 self.assertTrue(self.are_uris_equivalent(uri1, uri2), f"Cf. {uri1} with {uri2}") 

1081 

1082 # Test that removing one does not break the second 

1083 # This line will issue a warning log message for a ChainedDatastore 

1084 # that uses an InMemoryDatastore since in-memory can not ingest 

1085 # files. 

1086 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

1087 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

1088 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

1089 multi2b = butler.get(datasetTypeName, dataId2) 

1090 self.assertEqual(multi2, multi2b) 

1091 

1092 # Ensure we can ingest 0 datasets 

1093 datasets = [] 

1094 butler.ingest(*datasets) 

1095 

1096 def testPickle(self) -> None: 

1097 """Test pickle support.""" 

1098 butler = self.create_empty_butler(run=self.default_run) 

1099 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

1100 butlerOut = pickle.loads(pickle.dumps(butler)) 

1101 self.assertIsInstance(butlerOut, Butler) 

1102 self.assertEqual(butlerOut._config, butler._config) 

1103 self.assertEqual(butlerOut.collections, butler.collections) 

1104 self.assertEqual(butlerOut.run, butler.run) 

1105 

1106 def testGetDatasetTypes(self) -> None: 

1107 butler = self.create_empty_butler(run=self.default_run) 

1108 dimensions = butler.dimensions.conform(["instrument", "visit", "physical_filter"]) 

1109 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

1110 ( 

1111 "instrument", 

1112 [ 

1113 {"instrument": "DummyCam"}, 

1114 {"instrument": "DummyHSC"}, 

1115 {"instrument": "DummyCamComp"}, 

1116 ], 

1117 ), 

1118 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

1119 ("day_obs", [{"instrument": "DummyCam", "id": 20250101}]), 

1120 ( 

1121 "visit", 

1122 [ 

1123 { 

1124 "instrument": "DummyCam", 

1125 "id": 42, 

1126 "name": "fortytwo", 

1127 "physical_filter": "d-r", 

1128 "day_obs": 20250101, 

1129 } 

1130 ], 

1131 ), 

1132 ] 

1133 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1134 # Add needed Dimensions 

1135 for element, data in dimensionEntries: 

1136 butler.registry.insertDimensionData(element, *data) 

1137 

1138 # When a DatasetType is added to the registry entries are not created 

1139 # for components but querying them can return the components. 

1140 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1141 components = set() 

1142 for datasetTypeName in datasetTypeNames: 

1143 # Create and register a DatasetType 

1144 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1145 

1146 for componentName in storageClass.components: 

1147 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1148 

1149 fromRegistry: set[DatasetType] = set() 

1150 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1151 fromRegistry.add(parent_dataset_type) 

1152 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1153 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1154 

1155 # Now that we have some dataset types registered, validate them 

1156 butler.validateConfiguration( 

1157 ignore=[ 

1158 "test_metric_comp", 

1159 "metric3", 

1160 "metric5", 

1161 "calexp", 

1162 "DummySC", 

1163 "datasetType.component", 

1164 "random_data", 

1165 "random_data_2", 

1166 ] 

1167 ) 

1168 

1169 # Add a new datasetType that will fail template validation 

1170 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1171 if self.validationCanFail: 

1172 with self.assertRaises(ValidationError): 

1173 butler.validateConfiguration() 

1174 

1175 # Rerun validation but with a subset of dataset type names 

1176 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1177 

1178 # Rerun validation but ignore the bad datasetType 

1179 butler.validateConfiguration( 

1180 ignore=[ 

1181 "test_metric_comp", 

1182 "metric3", 

1183 "metric5", 

1184 "calexp", 

1185 "DummySC", 

1186 "datasetType.component", 

1187 "random_data", 

1188 "random_data_2", 

1189 ] 

1190 ) 

1191 

1192 def testTransaction(self) -> None: 

1193 butler = self.create_empty_butler(run=self.default_run) 

1194 datasetTypeName = "test_metric" 

1195 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1196 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1197 ("instrument", {"instrument": "DummyCam"}), 

1198 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1199 ("day_obs", {"instrument": "DummyCam", "id": 20250101}), 

1200 ( 

1201 "visit", 

1202 { 

1203 "instrument": "DummyCam", 

1204 "id": 42, 

1205 "name": "fortytwo", 

1206 "physical_filter": "d-r", 

1207 "day_obs": 20250101, 

1208 }, 

1209 ), 

1210 ) 

1211 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1212 metric = makeExampleMetrics() 

1213 dataId = {"instrument": "DummyCam", "visit": 42} 

1214 # Create and register a DatasetType 

1215 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1216 with self.assertRaises(TransactionTestError): 

1217 with butler.transaction(): 

1218 # Add needed Dimensions 

1219 for args in dimensionEntries: 

1220 butler.registry.insertDimensionData(*args) 

1221 # Store a dataset 

1222 ref = butler.put(metric, datasetTypeName, dataId) 

1223 self.assertIsInstance(ref, DatasetRef) 

1224 # Test get of a ref. 

1225 metricOut = butler.get(ref) 

1226 self.assertEqual(metric, metricOut) 

1227 # Test get 

1228 metricOut = butler.get(datasetTypeName, dataId) 

1229 self.assertEqual(metric, metricOut) 

1230 # Check we can get components 

1231 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1232 raise TransactionTestError("This should roll back the entire transaction") 

1233 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1234 butler.registry.expandDataId(dataId) 

1235 # Should raise LookupError for missing data ID value 

1236 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1237 butler.get(datasetTypeName, dataId) 

1238 # Also check explicitly if Dataset entry is missing 

1239 self.assertIsNone(butler.find_dataset(datasetType, dataId, collections=butler.collections)) 

1240 # Direct retrieval should not find the file in the Datastore 

1241 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1242 butler.get(ref) 

1243 

1244 def testMakeRepo(self) -> None: 

1245 """Test that we can write butler configuration to a new repository via 

1246 the Butler.makeRepo interface and then instantiate a butler from the 

1247 repo root. 

1248 """ 

1249 # Do not run the test if we know this datastore configuration does 

1250 # not support a file system root 

1251 if self.fullConfigKey is None: 

1252 return 

1253 

1254 # create two separate directories 

1255 root1 = tempfile.mkdtemp(dir=self.root) 

1256 root2 = tempfile.mkdtemp(dir=self.root) 

1257 

1258 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1259 limited = Config(self.configFile) 

1260 butler1 = Butler.from_config(butlerConfig) 

1261 assert isinstance(butler1, DirectButler), "Expect DirectButler in configuration" 

1262 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1263 full = Config(self.tmpConfigFile) 

1264 butler2 = Butler.from_config(butlerConfig) 

1265 assert isinstance(butler2, DirectButler), "Expect DirectButler in configuration" 

1266 # Butlers should have the same configuration regardless of whether 

1267 # defaults were expanded. 

1268 self.assertEqual(butler1._config, butler2._config) 

1269 # Config files loaded directly should not be the same. 

1270 self.assertNotEqual(limited, full) 

1271 # Make sure "limited" doesn't have a few keys we know it should be 

1272 # inheriting from defaults. 

1273 self.assertIn(self.fullConfigKey, full) 

1274 self.assertNotIn(self.fullConfigKey, limited) 

1275 

1276 # Collections don't appear until something is put in them 

1277 collections1 = set(butler1.registry.queryCollections()) 

1278 self.assertEqual(collections1, set()) 

1279 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1280 

1281 # Check that a config with no associated file name will not 

1282 # work properly with relocatable Butler repo 

1283 butlerConfig.configFile = None 

1284 with self.assertRaises(ValueError): 

1285 Butler.from_config(butlerConfig) 

1286 

1287 with self.assertRaises(FileExistsError): 

1288 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1289 

1290 def testStringification(self) -> None: 

1291 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1292 butlerStr = str(butler) 

1293 

1294 if self.datastoreStr is not None: 

1295 for testStr in self.datastoreStr: 

1296 self.assertIn(testStr, butlerStr) 

1297 if self.registryStr is not None: 

1298 self.assertIn(self.registryStr, butlerStr) 

1299 

1300 datastoreName = butler._datastore.name 

1301 if self.datastoreName is not None: 

1302 for testStr in self.datastoreName: 

1303 self.assertIn(testStr, datastoreName) 

1304 

1305 def testButlerRewriteDataId(self) -> None: 

1306 """Test that dataIds can be rewritten based on dimension records.""" 

1307 butler = self.create_empty_butler(run=self.default_run) 

1308 

1309 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1310 datasetTypeName = "random_data" 

1311 

1312 # Create dimension records. 

1313 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1314 butler.registry.insertDimensionData( 

1315 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1316 ) 

1317 butler.registry.insertDimensionData( 

1318 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1319 ) 

1320 

1321 dimensions = butler.dimensions.conform(["instrument", "exposure"]) 

1322 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1323 butler.registry.registerDatasetType(datasetType) 

1324 

1325 n_exposures = 5 

1326 dayobs = 20210530 

1327 

1328 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": dayobs}) 

1329 

1330 for i in range(n_exposures): 

1331 butler.registry.insertDimensionData("group", {"instrument": "DummyCamComp", "name": f"group{i}"}) 

1332 butler.registry.insertDimensionData( 

1333 "exposure", 

1334 { 

1335 "instrument": "DummyCamComp", 

1336 "id": i, 

1337 "obs_id": f"exp{i}", 

1338 "seq_num": i, 

1339 "day_obs": dayobs, 

1340 "physical_filter": "d-r", 

1341 "group": f"group{i}", 

1342 }, 

1343 ) 

1344 

1345 # Write some data. 

1346 for i in range(n_exposures): 

1347 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1348 

1349 # Use the seq_num for the put to test rewriting. 

1350 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1351 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1352 

1353 # Check that the exposure is correct in the dataId 

1354 self.assertEqual(ref.dataId["exposure"], i) 

1355 

1356 # and check that we can get the dataset back with the same dataId 

1357 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1358 self.assertEqual(new_metric, metric) 

1359 

1360 # Check that we can find the datasets using the day_obs or the 

1361 # exposure.day_obs. 

1362 datasets_1 = list( 

1363 butler.registry.queryDatasets( 

1364 datasetType, 

1365 collections=self.default_run, 

1366 where="day_obs = dayObs AND instrument = instr", 

1367 bind={"dayObs": dayobs, "instr": "DummyCamComp"}, 

1368 ) 

1369 ) 

1370 datasets_2 = list( 

1371 butler.registry.queryDatasets( 

1372 datasetType, 

1373 collections=self.default_run, 

1374 where="exposure.day_obs = dayObs AND instrument = instr", 

1375 bind={"dayObs": dayobs, "instr": "DummyCamComp"}, 

1376 ) 

1377 ) 

1378 self.assertEqual(datasets_1, datasets_2) 

1379 

1380 def testGetDatasetCollectionCaching(self): 

1381 # Prior to DM-41117, there was a bug where get_dataset would throw 

1382 # MissingCollectionError if you tried to fetch a dataset that was added 

1383 # after the collection cache was last updated. 

1384 reader_butler, datasetType = self.create_butler(self.default_run, "int", "datasettypename") 

1385 writer_butler = self.create_empty_butler(writeable=True, run="new_run") 

1386 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1387 put_ref = writer_butler.put(123, datasetType, dataId) 

1388 get_ref = reader_butler.get_dataset(put_ref.id) 

1389 self.assertEqual(get_ref.id, put_ref.id) 

1390 

1391 def testCollectionChainRedefine(self): 

1392 butler = self._setup_to_test_collection_chain() 

1393 

1394 butler.collection_chains.redefine_chain("chain", "a") 

1395 self._check_chain(butler, ["a"]) 

1396 

1397 # Duplicates are removed from the list of children 

1398 butler.collection_chains.redefine_chain("chain", ["c", "b", "c"]) 

1399 self._check_chain(butler, ["c", "b"]) 

1400 

1401 # Empty list clears the chain 

1402 butler.collection_chains.redefine_chain("chain", []) 

1403 self._check_chain(butler, []) 

1404 

1405 self._test_common_chain_functionality(butler, butler.collection_chains.redefine_chain) 

1406 

1407 def testCollectionChainPrepend(self): 

1408 butler = self._setup_to_test_collection_chain() 

1409 

1410 # Duplicates are removed from the list of children 

1411 butler.collection_chains.prepend_chain("chain", ["c", "b", "c"]) 

1412 self._check_chain(butler, ["c", "b"]) 

1413 

1414 # Prepend goes on the front of existing chain 

1415 butler.collection_chains.prepend_chain("chain", ["a"]) 

1416 self._check_chain(butler, ["a", "c", "b"]) 

1417 

1418 # Empty prepend does nothing 

1419 butler.collection_chains.prepend_chain("chain", []) 

1420 self._check_chain(butler, ["a", "c", "b"]) 

1421 

1422 # Prepending children that already exist in the chain removes them from 

1423 # their current position. 

1424 butler.collection_chains.prepend_chain("chain", ["d", "b", "c"]) 

1425 self._check_chain(butler, ["d", "b", "c", "a"]) 

1426 

1427 self._test_common_chain_functionality(butler, butler.collection_chains.prepend_chain) 

1428 

1429 def testCollectionChainExtend(self): 

1430 butler = self._setup_to_test_collection_chain() 

1431 

1432 # Duplicates are removed from the list of children 

1433 butler.collection_chains.extend_chain("chain", ["c", "b", "c"]) 

1434 self._check_chain(butler, ["c", "b"]) 

1435 

1436 # Extend goes on the end of existing chain 

1437 butler.collection_chains.extend_chain("chain", ["a"]) 

1438 self._check_chain(butler, ["c", "b", "a"]) 

1439 

1440 # Empty extend does nothing 

1441 butler.collection_chains.extend_chain("chain", []) 

1442 self._check_chain(butler, ["c", "b", "a"]) 

1443 

1444 # Extending children that already exist in the chain removes them from 

1445 # their current position. 

1446 butler.collection_chains.extend_chain("chain", ["d", "b", "c"]) 

1447 self._check_chain(butler, ["a", "d", "b", "c"]) 

1448 

1449 self._test_common_chain_functionality(butler, butler.collection_chains.extend_chain) 

1450 

1451 def testCollectionChainRemove(self) -> None: 

1452 butler = self._setup_to_test_collection_chain() 

1453 

1454 butler.registry.setCollectionChain("chain", ["a", "b", "c", "d"]) 

1455 

1456 butler.collection_chains.remove_from_chain("chain", "c") 

1457 self._check_chain(butler, ["a", "b", "d"]) 

1458 

1459 # Duplicates are allowed in the list of children 

1460 butler.collection_chains.remove_from_chain("chain", ["b", "b", "a"]) 

1461 self._check_chain(butler, ["d"]) 

1462 

1463 # Empty remove does nothing 

1464 butler.collection_chains.remove_from_chain("chain", []) 

1465 self._check_chain(butler, ["d"]) 

1466 

1467 # Removing children that aren't in the chain does nothing 

1468 butler.collection_chains.remove_from_chain("chain", ["a", "chain"]) 

1469 self._check_chain(butler, ["d"]) 

1470 

1471 self._test_common_chain_functionality( 

1472 butler, butler.collection_chains.remove_from_chain, skip_cycle_check=True 

1473 ) 

1474 

1475 def _setup_to_test_collection_chain(self) -> Butler: 

1476 butler = self.create_empty_butler(writeable=True) 

1477 

1478 butler.registry.registerCollection("chain", CollectionType.CHAINED) 

1479 

1480 runs = ["a", "b", "c", "d"] 

1481 for run in runs: 

1482 butler.registry.registerCollection(run) 

1483 

1484 butler.registry.registerCollection("staticchain", CollectionType.CHAINED) 

1485 butler.registry.setCollectionChain("staticchain", ["a", "b"]) 

1486 

1487 return butler 

1488 

1489 def _check_chain(self, butler: Butler, expected: list[str]) -> None: 

1490 children = butler.registry.getCollectionChain("chain") 

1491 self.assertEqual(expected, list(children)) 

1492 

1493 def _test_common_chain_functionality( 

1494 self, butler, func: Callable[[str, str | list[str]], Any], *, skip_cycle_check=False 

1495 ) -> None: 

1496 # Missing parent collection 

1497 with self.assertRaises(MissingCollectionError): 

1498 func("doesnotexist", []) 

1499 # Missing child collection 

1500 with self.assertRaises(MissingCollectionError): 

1501 func("chain", ["doesnotexist"]) 

1502 # Forbid operations on non-chained collections 

1503 with self.assertRaises(CollectionTypeError): 

1504 func("d", ["a"]) 

1505 

1506 # Prevent collection cycles 

1507 if not skip_cycle_check: 

1508 butler.registry.registerCollection("chain2", CollectionType.CHAINED) 

1509 func("chain2", "chain") 

1510 with self.assertRaises(CollectionCycleError): 

1511 func("chain", "chain2") 

1512 

1513 # Make sure none of the earlier operations interfered with unrelated 

1514 # chains. 

1515 self.assertEqual(["a", "b"], list(butler.registry.getCollectionChain("staticchain"))) 

1516 

1517 with butler._caching_context(): 

1518 with self.assertRaisesRegex(RuntimeError, "Chained collection modification not permitted"): 

1519 func("chain", "a") 

1520 

1521 

1522class FileDatastoreButlerTests(ButlerTests): 

1523 """Common tests and specialization of ButlerTests for butlers backed 

1524 by datastores that inherit from FileDatastore. 

1525 """ 

1526 

1527 trustModeSupported = True 

1528 

1529 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1530 """Check if file exists at a given path (relative to root). 

1531 

1532 Test testPutTemplates verifies actual physical existance of the files 

1533 in the requested location. 

1534 """ 

1535 uri = ResourcePath(root, forceDirectory=True) 

1536 return uri.join(relpath).exists() 

1537 

1538 def testPutTemplates(self) -> None: 

1539 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1540 butler = self.create_empty_butler(run=self.default_run) 

1541 

1542 # Add needed Dimensions 

1543 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1544 butler.registry.insertDimensionData( 

1545 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1546 ) 

1547 butler.registry.insertDimensionData("day_obs", {"instrument": "DummyCamComp", "id": 20250101}) 

1548 butler.registry.insertDimensionData( 

1549 "visit", 

1550 { 

1551 "instrument": "DummyCamComp", 

1552 "id": 423, 

1553 "name": "v423", 

1554 "physical_filter": "d-r", 

1555 "day_obs": 20250101, 

1556 }, 

1557 ) 

1558 butler.registry.insertDimensionData( 

1559 "visit", 

1560 { 

1561 "instrument": "DummyCamComp", 

1562 "id": 425, 

1563 "name": "v425", 

1564 "physical_filter": "d-r", 

1565 "day_obs": 20250101, 

1566 }, 

1567 ) 

1568 

1569 # Create and store a dataset 

1570 metric = makeExampleMetrics() 

1571 

1572 # Create two almost-identical DatasetTypes (both will use default 

1573 # template) 

1574 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1575 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1576 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1577 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1578 

1579 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1580 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1581 

1582 # Put with exactly the data ID keys needed 

1583 ref = butler.put(metric, "metric1", dataId1) 

1584 uri = butler.getURI(ref) 

1585 self.assertTrue(uri.exists()) 

1586 self.assertTrue( 

1587 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1588 ) 

1589 

1590 # Check the template based on dimensions 

1591 if hasattr(butler._datastore, "templates"): 

1592 butler._datastore.templates.validateTemplates([ref]) 

1593 

1594 # Put with extra data ID keys (physical_filter is an optional 

1595 # dependency); should not change template (at least the way we're 

1596 # defining them to behave now; the important thing is that they 

1597 # must be consistent). 

1598 ref = butler.put(metric, "metric2", dataId2) 

1599 uri = butler.getURI(ref) 

1600 self.assertTrue(uri.exists()) 

1601 self.assertTrue( 

1602 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1603 ) 

1604 

1605 # Check the template based on dimensions 

1606 if hasattr(butler._datastore, "templates"): 

1607 butler._datastore.templates.validateTemplates([ref]) 

1608 

1609 # Use a template that has a typo in dimension record metadata. 

1610 # Easier to test with a butler that has a ref with records attached. 

1611 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1612 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1613 path = template.format(ref) 

1614 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1615 

1616 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1617 with self.assertRaises(KeyError): 

1618 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1619 template.format(ref) 

1620 

1621 # Now use a file template that will not result in unique filenames 

1622 with self.assertRaises(FileTemplateValidationError): 

1623 butler.put(metric, "metric3", dataId1) 

1624 

1625 def testImportExport(self) -> None: 

1626 # Run put/get tests just to create and populate a repo. 

1627 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1628 self.runImportExportTest(storageClass) 

1629 

1630 @unittest.expectedFailure 

1631 def testImportExportVirtualComposite(self) -> None: 

1632 # Run put/get tests just to create and populate a repo. 

1633 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1634 self.runImportExportTest(storageClass) 

1635 

1636 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1637 """Test exporting and importing. 

1638 

1639 This test does an export to a temp directory and an import back 

1640 into a new temp directory repo. It does not assume a posix datastore. 

1641 """ 

1642 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1643 

1644 # Test that we must have a file extension. 

1645 with self.assertRaises(ValueError): 

1646 with exportButler.export(filename="dump", directory=".") as export: 

1647 pass 

1648 

1649 # Test that unknown format is not allowed. 

1650 with self.assertRaises(ValueError): 

1651 with exportButler.export(filename="dump.fits", directory=".") as export: 

1652 pass 

1653 

1654 # Test that the repo actually has at least one dataset. 

1655 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1656 self.assertGreater(len(datasets), 0) 

1657 # Add a DimensionRecord that's unused by those datasets. 

1658 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1659 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1660 # Export and then import datasets. 

1661 with safeTestTempDir(TESTDIR) as exportDir: 

1662 exportFile = os.path.join(exportDir, "exports.yaml") 

1663 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1664 export.saveDatasets(datasets) 

1665 # Export the same datasets again. This should quietly do 

1666 # nothing because of internal deduplication, and it shouldn't 

1667 # complain about being asked to export the "htm7" elements even 

1668 # though there aren't any in these datasets or in the database. 

1669 export.saveDatasets(datasets, elements=["htm7"]) 

1670 # Save one of the data IDs again; this should be harmless 

1671 # because of internal deduplication. 

1672 export.saveDataIds([datasets[0].dataId]) 

1673 # Save some dimension records directly. 

1674 export.saveDimensionData("skymap", [skymapRecord]) 

1675 self.assertTrue(os.path.exists(exportFile)) 

1676 with safeTestTempDir(TESTDIR) as importDir: 

1677 # We always want this to be a local posix butler 

1678 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1679 # Calling script.butlerImport tests the implementation of the 

1680 # butler command line interface "import" subcommand. Functions 

1681 # in the script folder are generally considered protected and 

1682 # should not be used as public api. 

1683 with open(exportFile) as f: 

1684 script.butlerImport( 

1685 importDir, 

1686 export_file=f, 

1687 directory=exportDir, 

1688 transfer="auto", 

1689 skip_dimensions=None, 

1690 ) 

1691 importButler = Butler.from_config(importDir, run=self.default_run) 

1692 for ref in datasets: 

1693 with self.subTest(ref=ref): 

1694 # Test for existence by passing in the DatasetType and 

1695 # data ID separately, to avoid lookup by dataset_id. 

1696 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1697 self.assertEqual( 

1698 list(importButler.registry.queryDimensionRecords("skymap")), 

1699 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)], 

1700 ) 

1701 

1702 def testRemoveRuns(self) -> None: 

1703 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1704 butler = self.create_empty_butler(writeable=True) 

1705 # Load registry data with dimensions to hang datasets off of. 

1706 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1707 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1708 # Add some RUN-type collection. 

1709 run1 = "run1" 

1710 butler.registry.registerRun(run1) 

1711 run2 = "run2" 

1712 butler.registry.registerRun(run2) 

1713 # put a dataset in each 

1714 metric = makeExampleMetrics() 

1715 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1716 datasetType = self.addDatasetType( 

1717 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1718 ) 

1719 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1720 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1721 uri1 = butler.getURI(ref1) 

1722 uri2 = butler.getURI(ref2) 

1723 

1724 with self.assertRaises(OrphanedRecordError): 

1725 butler.registry.removeDatasetType(datasetType.name) 

1726 

1727 # Remove from both runs with different values for unstore. 

1728 butler.removeRuns([run1], unstore=True) 

1729 butler.removeRuns([run2], unstore=False) 

1730 # Should be nothing in registry for either one, and datastore should 

1731 # not think either exists. 

1732 with self.assertRaises(MissingCollectionError): 

1733 butler.registry.getCollectionType(run1) 

1734 with self.assertRaises(MissingCollectionError): 

1735 butler.registry.getCollectionType(run2) 

1736 self.assertFalse(butler.stored(ref1)) 

1737 self.assertFalse(butler.stored(ref2)) 

1738 # The ref we unstored should be gone according to the URI, but the 

1739 # one we forgot should still be around. 

1740 self.assertFalse(uri1.exists()) 

1741 self.assertTrue(uri2.exists()) 

1742 

1743 # Now that the collections have been pruned we can remove the 

1744 # dataset type 

1745 butler.registry.removeDatasetType(datasetType.name) 

1746 

1747 with self.assertLogs("lsst.daf.butler.registry", "INFO") as cm: 

1748 butler.registry.removeDatasetType(("test*", "test*")) 

1749 self.assertIn("not defined", "\n".join(cm.output)) 

1750 

1751 def remove_dataset_out_of_band(self, butler: Butler, ref: DatasetRef) -> None: 

1752 """Simulate an external actor removing a file outside of Butler's 

1753 knowledge. 

1754 

1755 Subclasses may override to handle more complicated datastore 

1756 configurations. 

1757 """ 

1758 uri = butler.getURI(ref) 

1759 uri.remove() 

1760 datastore = cast(FileDatastore, butler._datastore) 

1761 datastore.cacheManager.remove_from_cache(ref) 

1762 

1763 def testPruneDatasets(self) -> None: 

1764 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1765 butler = self.create_empty_butler(writeable=True) 

1766 # Load registry data with dimensions to hang datasets off of. 

1767 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1768 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1769 # Add some RUN-type collections. 

1770 run1 = "run1" 

1771 butler.registry.registerRun(run1) 

1772 run2 = "run2" 

1773 butler.registry.registerRun(run2) 

1774 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1775 # different runs. ref3 has a different data ID. 

1776 metric = makeExampleMetrics() 

1777 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1778 datasetType = self.addDatasetType( 

1779 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1780 ) 

1781 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1782 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1783 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1784 

1785 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1786 for ref, stored in many_stored.items(): 

1787 self.assertTrue(stored, f"Ref {ref} should be stored") 

1788 

1789 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1790 for ref, exists in many_exists.items(): 

1791 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1792 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1793 

1794 # Simple prune. 

1795 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1796 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1797 

1798 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1799 for ref, stored in many_stored.items(): 

1800 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1801 

1802 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1803 for ref, exists in many_exists.items(): 

1804 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1805 

1806 # Put data back. 

1807 ref1_new = butler.put(metric, ref1) 

1808 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1809 ref2 = butler.put(metric, ref2) 

1810 

1811 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1812 self.assertTrue(many_stored[ref1]) 

1813 self.assertTrue(many_stored[ref2]) 

1814 self.assertFalse(many_stored[ref3]) 

1815 

1816 ref3 = butler.put(metric, ref3) 

1817 

1818 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1819 for ref, exists in many_exists.items(): 

1820 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1821 

1822 # Clear out the datasets from registry and start again. 

1823 refs = [ref1, ref2, ref3] 

1824 butler.pruneDatasets(refs, purge=True, unstore=True) 

1825 for ref in refs: 

1826 butler.put(metric, ref) 

1827 

1828 # Confirm we can retrieve deferred. 

1829 dref1 = butler.getDeferred(ref1) # known and exists 

1830 metric1 = dref1.get() 

1831 self.assertEqual(metric1, metric) 

1832 

1833 # Test different forms of file availability. 

1834 # Need to be in a state where: 

1835 # - one ref just has registry record. 

1836 # - one ref has a missing file but a datastore record. 

1837 # - one ref has a missing datastore record but file is there. 

1838 # - one ref does not exist anywhere. 

1839 # Do not need to test a ref that has everything since that is tested 

1840 # above. 

1841 ref0 = DatasetRef( 

1842 datasetType, 

1843 DataCoordinate.standardize( 

1844 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1845 ), 

1846 run=run1, 

1847 ) 

1848 

1849 # Delete from datastore and retain in Registry. 

1850 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1851 

1852 # File has been removed. 

1853 self.remove_dataset_out_of_band(butler, ref2) 

1854 

1855 # Datastore has lost track. 

1856 butler._datastore.forget([ref3]) 

1857 

1858 # First test with a standard butler. 

1859 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1860 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1861 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1862 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1863 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1864 

1865 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1866 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1867 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1868 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1869 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1870 self.assertTrue(exists_many[ref2]) 

1871 

1872 # Check that per-ref query gives the same answer as many query. 

1873 for ref, exists in exists_many.items(): 

1874 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1875 

1876 # Get deferred checks for existence before it allows it to be 

1877 # retrieved. 

1878 with self.assertRaises(LookupError): 

1879 butler.getDeferred(ref3) # not known, file exists 

1880 dref2 = butler.getDeferred(ref2) # known but file missing 

1881 with self.assertRaises(FileNotFoundError): 

1882 dref2.get() 

1883 

1884 # Test again with a trusting butler. 

1885 if self.trustModeSupported: 

1886 butler._datastore.trustGetRequest = True 

1887 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1888 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1889 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1890 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1891 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1892 

1893 # When trusting we can get a deferred dataset handle that is not 

1894 # known but does exist. 

1895 dref3 = butler.getDeferred(ref3) 

1896 metric3 = dref3.get() 

1897 self.assertEqual(metric3, metric) 

1898 

1899 # Check that per-ref query gives the same answer as many query. 

1900 for ref, exists in exists_many.items(): 

1901 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1902 

1903 # Create a ref that surprisingly has the UUID of an existing ref 

1904 # but is not the same. 

1905 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1906 with self.assertRaises(ValueError): 

1907 butler.exists(ref_bad) 

1908 

1909 # Create a ref that has a compatible storage class. 

1910 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1911 exists = butler.exists(ref_compat) 

1912 self.assertEqual(exists, exists_many[ref2]) 

1913 

1914 # Remove everything and start from scratch. 

1915 butler._datastore.trustGetRequest = False 

1916 butler.pruneDatasets(refs, purge=True, unstore=True) 

1917 for ref in refs: 

1918 butler.put(metric, ref) 

1919 

1920 # These tests mess directly with the trash table and can leave the 

1921 # datastore in an odd state. Do them at the end. 

1922 # Check that in normal mode, deleting the record will lead to 

1923 # trash not touching the file. 

1924 uri1 = butler.getURI(ref1) 

1925 butler._datastore.bridge.moveToTrash( 

1926 [ref1], transaction=None 

1927 ) # Update the dataset_location table 

1928 butler._datastore.forget([ref1]) 

1929 butler._datastore.trash(ref1) 

1930 butler._datastore.emptyTrash() 

1931 self.assertTrue(uri1.exists()) 

1932 uri1.remove() # Clean it up. 

1933 

1934 # Simulate execution butler setup by deleting the datastore 

1935 # record but keeping the file around and trusting. 

1936 butler._datastore.trustGetRequest = True 

1937 uris = butler.get_many_uris([ref2, ref3]) 

1938 uri2 = uris[ref2].primaryURI 

1939 uri3 = uris[ref3].primaryURI 

1940 self.assertTrue(uri2.exists()) 

1941 self.assertTrue(uri3.exists()) 

1942 

1943 # Remove the datastore record. 

1944 butler._datastore.bridge.moveToTrash( 

1945 [ref2], transaction=None 

1946 ) # Update the dataset_location table 

1947 butler._datastore.forget([ref2]) 

1948 self.assertTrue(uri2.exists()) 

1949 butler._datastore.trash([ref2, ref3]) 

1950 # Immediate removal for ref2 file 

1951 self.assertFalse(uri2.exists()) 

1952 # But ref3 has to wait for the empty. 

1953 self.assertTrue(uri3.exists()) 

1954 butler._datastore.emptyTrash() 

1955 self.assertFalse(uri3.exists()) 

1956 

1957 # Clear out the datasets from registry. 

1958 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1959 

1960 

1961class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1962 """PosixDatastore specialization of a butler""" 

1963 

1964 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1965 fullConfigKey: str | None = ".datastore.formatters" 

1966 validationCanFail = True 

1967 datastoreStr = ["/tmp"] 

1968 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1969 registryStr = "/gen3.sqlite3" 

1970 

1971 def testPathConstructor(self) -> None: 

1972 """Independent test of constructor using PathLike.""" 

1973 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1974 self.assertIsInstance(butler, Butler) 

1975 

1976 # And again with a Path object with the butler yaml 

1977 path = pathlib.Path(self.tmpConfigFile) 

1978 butler = Butler.from_config(path, writeable=False) 

1979 self.assertIsInstance(butler, Butler) 

1980 

1981 # And again with a Path object without the butler yaml 

1982 # (making sure we skip it if the tmp config doesn't end 

1983 # in butler.yaml -- which is the case for a subclass) 

1984 if self.tmpConfigFile.endswith("butler.yaml"): 

1985 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1986 butler = Butler.from_config(path, writeable=False) 

1987 self.assertIsInstance(butler, Butler) 

1988 

1989 def testExportTransferCopy(self) -> None: 

1990 """Test local export using all transfer modes""" 

1991 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1992 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1993 # Test that the repo actually has at least one dataset. 

1994 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1995 self.assertGreater(len(datasets), 0) 

1996 uris = [exportButler.getURI(d) for d in datasets] 

1997 assert isinstance(exportButler._datastore, FileDatastore) 

1998 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]] 

1999 

2000 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

2001 

2002 for path in pathsInStore: 

2003 # Assume local file system 

2004 assert path is not None 

2005 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

2006 

2007 for transfer in ("copy", "link", "symlink", "relsymlink"): 

2008 with safeTestTempDir(TESTDIR) as exportDir: 

2009 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

2010 export.saveDatasets(datasets) 

2011 for path in pathsInStore: 

2012 assert path is not None 

2013 self.assertTrue( 

2014 self.checkFileExists(exportDir, path), 

2015 f"Check that mode {transfer} exported files", 

2016 ) 

2017 

2018 def testPytypeCoercion(self) -> None: 

2019 """Test python type coercion on Butler.get and put.""" 

2020 # Store some data with the normal example storage class. 

2021 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

2022 datasetTypeName = "test_metric" 

2023 butler = self.runPutGetTest(storageClass, datasetTypeName) 

2024 

2025 dataId = {"instrument": "DummyCamComp", "visit": 423} 

2026 metric = butler.get(datasetTypeName, dataId=dataId) 

2027 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

2028 

2029 datasetType_ori = butler.get_dataset_type(datasetTypeName) 

2030 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

2031 

2032 # Now need to hack the registry dataset type definition. 

2033 # There is no API for this. 

2034 assert isinstance(butler._registry, SqlRegistry) 

2035 manager = butler._registry._managers.datasets 

2036 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

2037 manager._db.update( 

2038 manager._static.dataset_type, 

2039 {"name": datasetTypeName}, 

2040 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

2041 ) 

2042 

2043 # Force reset of dataset type cache 

2044 butler.registry.refresh() 

2045 

2046 datasetType_new = butler.get_dataset_type(datasetTypeName) 

2047 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

2048 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

2049 

2050 metric_model = butler.get(datasetTypeName, dataId=dataId) 

2051 self.assertNotEqual(type(metric_model), type(metric)) 

2052 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

2053 

2054 # Put the model and read it back to show that everything now 

2055 # works as normal. 

2056 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

2057 metric_model_new = butler.get(metric_ref) 

2058 self.assertEqual(metric_model_new, metric_model) 

2059 

2060 # Hack the storage class again to something that will fail on the 

2061 # get with no conversion class. 

2062 manager._db.update( 

2063 manager._static.dataset_type, 

2064 {"name": datasetTypeName}, 

2065 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

2066 ) 

2067 butler.registry.refresh() 

2068 

2069 with self.assertRaises(ValueError): 

2070 butler.get(datasetTypeName, dataId=dataId) 

2071 

2072 

2073@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

2074class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

2075 """PosixDatastore specialization of a butler using Postgres""" 

2076 

2077 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2078 fullConfigKey = ".datastore.formatters" 

2079 validationCanFail = True 

2080 datastoreStr = ["/tmp"] 

2081 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

2082 registryStr = "PostgreSQL@test" 

2083 postgresql: Any 

2084 

2085 @staticmethod 

2086 def _handler(postgresql: Any) -> None: 

2087 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

2088 with engine.begin() as connection: 

2089 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

2090 

2091 @classmethod 

2092 def setUpClass(cls) -> None: 

2093 # Create the postgres test server. 

2094 cls.postgresql = testing.postgresql.PostgresqlFactory( 

2095 cache_initialized_db=True, on_initialized=cls._handler 

2096 ) 

2097 super().setUpClass() 

2098 

2099 @classmethod 

2100 def tearDownClass(cls) -> None: 

2101 # Clean up any lingering SQLAlchemy engines/connections 

2102 # so they're closed before we shut down the server. 

2103 gc.collect() 

2104 cls.postgresql.clear_cache() 

2105 super().tearDownClass() 

2106 

2107 def setUp(self) -> None: 

2108 self.server = self.postgresql() 

2109 

2110 # Need to add a registry section to the config. 

2111 self._temp_config = False 

2112 config = Config(self.configFile) 

2113 config["registry", "db"] = self.server.url() 

2114 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

2115 config.dump(fh) 

2116 self.configFile = fh.name 

2117 self._temp_config = True 

2118 super().setUp() 

2119 

2120 def tearDown(self) -> None: 

2121 self.server.stop() 

2122 if self._temp_config and os.path.exists(self.configFile): 

2123 os.remove(self.configFile) 

2124 super().tearDown() 

2125 

2126 def testMakeRepo(self) -> None: 

2127 # The base class test assumes that it's using sqlite and assumes 

2128 # the config file is acceptable to sqlite. 

2129 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

2130 

2131 

2132@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

2133class ClonedPostgresPosixDatastoreButlerTestCase(PostgresPosixDatastoreButlerTestCase, unittest.TestCase): 

2134 """Test that Butler with a Postgres registry still works after cloning.""" 

2135 

2136 def create_butler( 

2137 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

2138 ) -> tuple[DirectButler, DatasetType]: 

2139 butler, datasetType = super().create_butler(run, storageClass, datasetTypeName) 

2140 return butler._clone(run=run), datasetType 

2141 

2142 

2143class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

2144 """InMemoryDatastore specialization of a butler""" 

2145 

2146 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

2147 fullConfigKey = None 

2148 useTempRoot = False 

2149 validationCanFail = False 

2150 datastoreStr = ["datastore='InMemory"] 

2151 datastoreName = ["InMemoryDatastore@"] 

2152 registryStr = "/gen3.sqlite3" 

2153 

2154 def testIngest(self) -> None: 

2155 pass 

2156 

2157 

2158class ClonedSqliteButlerTestCase(InMemoryDatastoreButlerTestCase, unittest.TestCase): 

2159 """Test that a Butler with a Sqlite registry still works after cloning.""" 

2160 

2161 def create_butler( 

2162 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

2163 ) -> tuple[DirectButler, DatasetType]: 

2164 butler, datasetType = super().create_butler(run, storageClass, datasetTypeName) 

2165 return butler._clone(run=run), datasetType 

2166 

2167 

2168class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

2169 """PosixDatastore specialization""" 

2170 

2171 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2172 fullConfigKey = ".datastore.datastores.1.formatters" 

2173 validationCanFail = True 

2174 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

2175 datastoreName = [ 

2176 "InMemoryDatastore@", 

2177 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

2178 "SecondDatastore", 

2179 ] 

2180 registryStr = "/gen3.sqlite3" 

2181 

2182 def testPruneDatasets(self) -> None: 

2183 # This test relies on manipulating files out-of-band, which is 

2184 # impossible for this configuration because of the InMemoryDatastore in 

2185 # the ChainedDatastore. 

2186 pass 

2187 

2188 

2189class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

2190 """Test that a yaml file in one location can refer to a root in another.""" 

2191 

2192 datastoreStr = ["dir1"] 

2193 # Disable the makeRepo test since we are deliberately not using 

2194 # butler.yaml as the config name. 

2195 fullConfigKey = None 

2196 

2197 def setUp(self) -> None: 

2198 self.root = makeTestTempDir(TESTDIR) 

2199 

2200 # Make a new repository in one place 

2201 self.dir1 = os.path.join(self.root, "dir1") 

2202 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

2203 

2204 # Move the yaml file to a different place and add a "root" 

2205 self.dir2 = os.path.join(self.root, "dir2") 

2206 os.makedirs(self.dir2, exist_ok=True) 

2207 configFile1 = os.path.join(self.dir1, "butler.yaml") 

2208 config = Config(configFile1) 

2209 config["root"] = self.dir1 

2210 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

2211 config.dumpToUri(configFile2) 

2212 os.remove(configFile1) 

2213 self.tmpConfigFile = configFile2 

2214 

2215 def testFileLocations(self) -> None: 

2216 self.assertNotEqual(self.dir1, self.dir2) 

2217 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

2218 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

2219 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

2220 

2221 

2222class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

2223 """Test that a config file created by makeRepo outside of repo works.""" 

2224 

2225 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2226 

2227 def setUp(self) -> None: 

2228 self.root = makeTestTempDir(TESTDIR) 

2229 self.root2 = makeTestTempDir(TESTDIR) 

2230 

2231 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

2232 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

2233 

2234 def tearDown(self) -> None: 

2235 if os.path.exists(self.root2): 

2236 shutil.rmtree(self.root2, ignore_errors=True) 

2237 super().tearDown() 

2238 

2239 def testConfigExistence(self) -> None: 

2240 c = Config(self.tmpConfigFile) 

2241 uri_config = ResourcePath(c["root"]) 

2242 uri_expected = ResourcePath(self.root, forceDirectory=True) 

2243 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

2244 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

2245 

2246 def testPutGet(self) -> None: 

2247 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

2248 self.runPutGetTest(storageClass, "test_metric") 

2249 

2250 

2251class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

2252 """Test that a config file created by makeRepo outside of repo works.""" 

2253 

2254 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2255 

2256 def setUp(self) -> None: 

2257 self.root = makeTestTempDir(TESTDIR) 

2258 self.root2 = makeTestTempDir(TESTDIR) 

2259 

2260 self.tmpConfigFile = self.root2 

2261 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

2262 

2263 def testConfigExistence(self) -> None: 

2264 # Append the yaml file else Config constructor does not know the file 

2265 # type. 

2266 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

2267 super().testConfigExistence() 

2268 

2269 

2270class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

2271 """Test that a config file created by makeRepo outside of repo works.""" 

2272 

2273 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2274 

2275 def setUp(self) -> None: 

2276 self.root = makeTestTempDir(TESTDIR) 

2277 self.root2 = makeTestTempDir(TESTDIR) 

2278 

2279 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

2280 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

2281 

2282 

2283@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

2284class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

2285 """S3Datastore specialization of a butler; an S3 storage Datastore + 

2286 a local in-memory SqlRegistry. 

2287 """ 

2288 

2289 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

2290 fullConfigKey = None 

2291 validationCanFail = True 

2292 

2293 bucketName = "anybucketname" 

2294 """Name of the Bucket that will be used in the tests. The name is read from 

2295 the config file used with the tests during set-up. 

2296 """ 

2297 

2298 root = "butlerRoot/" 

2299 """Root repository directory expected to be used in case useTempRoot=False. 

2300 Otherwise the root is set to a 20 characters long randomly generated string 

2301 during set-up. 

2302 """ 

2303 

2304 datastoreStr = [f"datastore={root}"] 

2305 """Contains all expected root locations in a format expected to be 

2306 returned by Butler stringification. 

2307 """ 

2308 

2309 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

2310 """The expected format of the S3 Datastore string.""" 

2311 

2312 registryStr = "/gen3.sqlite3" 

2313 """Expected format of the Registry string.""" 

2314 

2315 mock_aws = mock_aws() 

2316 """The mocked s3 interface from moto.""" 

2317 

2318 def genRoot(self) -> str: 

2319 """Return a random string of len 20 to serve as a root 

2320 name for the temporary bucket repo. 

2321 

2322 This is equivalent to tempfile.mkdtemp as this is what self.root 

2323 becomes when useTempRoot is True. 

2324 """ 

2325 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

2326 return rndstr + "/" 

2327 

2328 def setUp(self) -> None: 

2329 config = Config(self.configFile) 

2330 uri = ResourcePath(config[".datastore.datastore.root"]) 

2331 self.bucketName = uri.netloc 

2332 

2333 # Enable S3 mocking of tests. 

2334 self.enterContext(clean_test_environment_for_s3()) 

2335 self.mock_aws.start() 

2336 

2337 if self.useTempRoot: 

2338 self.root = self.genRoot() 

2339 rooturi = f"s3://{self.bucketName}/{self.root}" 

2340 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

2341 

2342 # need local folder to store registry database 

2343 self.reg_dir = makeTestTempDir(TESTDIR) 

2344 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

2345 

2346 # MOTO needs to know that we expect Bucket bucketname to exist 

2347 # (this used to be the class attribute bucketName) 

2348 s3 = boto3.resource("s3") 

2349 s3.create_bucket(Bucket=self.bucketName) 

2350 

2351 self.datastoreStr = [f"datastore='{rooturi}'"] 

2352 self.datastoreName = [f"FileDatastore@{rooturi}"] 

2353 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

2354 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

2355 

2356 def tearDown(self) -> None: 

2357 s3 = boto3.resource("s3") 

2358 bucket = s3.Bucket(self.bucketName) 

2359 try: 

2360 bucket.objects.all().delete() 

2361 except botocore.exceptions.ClientError as e: 

2362 if e.response["Error"]["Code"] == "404": 

2363 # the key was not reachable - pass 

2364 pass 

2365 else: 

2366 raise 

2367 

2368 bucket = s3.Bucket(self.bucketName) 

2369 bucket.delete() 

2370 

2371 # Stop the S3 mock. 

2372 self.mock_aws.stop() 

2373 

2374 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

2375 shutil.rmtree(self.reg_dir, ignore_errors=True) 

2376 

2377 if self.useTempRoot and os.path.exists(self.root): 

2378 shutil.rmtree(self.root, ignore_errors=True) 

2379 

2380 super().tearDown() 

2381 

2382 

2383class PosixDatastoreTransfers(unittest.TestCase): 

2384 """Test data transfers between butlers. 

2385 

2386 Test for different managers. UUID to UUID and integer to integer are 

2387 tested. UUID to integer is not supported since we do not currently 

2388 want to allow that. Integer to UUID is supported with the caveat 

2389 that UUID4 will be generated and this will be incorrect for raw 

2390 dataset types. The test ignores that. 

2391 """ 

2392 

2393 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2394 storageClassFactory: StorageClassFactory 

2395 

2396 @classmethod 

2397 def setUpClass(cls) -> None: 

2398 cls.storageClassFactory = StorageClassFactory() 

2399 cls.storageClassFactory.addFromConfig(cls.configFile) 

2400 

2401 def setUp(self) -> None: 

2402 self.root = makeTestTempDir(TESTDIR) 

2403 self.config = Config(self.configFile) 

2404 

2405 def tearDown(self) -> None: 

2406 removeTestTempDir(self.root) 

2407 

2408 def create_butler(self, manager: str, label: str) -> Butler: 

2409 config = Config(self.configFile) 

2410 config["registry", "managers", "datasets"] = manager 

2411 return Butler.from_config( 

2412 Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True 

2413 ) 

2414 

2415 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

2416 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

2417 if manager1 is None: 

2418 manager1 = default 

2419 if manager2 is None: 

2420 manager2 = default 

2421 self.source_butler = self.create_butler(manager1, "1") 

2422 self.target_butler = self.create_butler(manager2, "2") 

2423 

2424 def testTransferUuidToUuid(self) -> None: 

2425 self.create_butlers() 

2426 self.assertButlerTransfers() 

2427 

2428 def testTransferMissing(self) -> None: 

2429 """Test transfers where datastore records are missing. 

2430 

2431 This is how execution butler works. 

2432 """ 

2433 self.create_butlers() 

2434 

2435 # Configure the source butler to allow trust. 

2436 self.source_butler._datastore._set_trust_mode(True) 

2437 

2438 self.assertButlerTransfers(purge=True) 

2439 

2440 def testTransferMissingDisassembly(self) -> None: 

2441 """Test transfers where datastore records are missing. 

2442 

2443 This is how execution butler works. 

2444 """ 

2445 self.create_butlers() 

2446 

2447 # Configure the source butler to allow trust. 

2448 self.source_butler._datastore._set_trust_mode(True) 

2449 

2450 # Test disassembly. 

2451 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2452 

2453 def testAbsoluteURITransferDirect(self) -> None: 

2454 """Test transfer using an absolute URI.""" 

2455 self._absolute_transfer("auto") 

2456 

2457 def testAbsoluteURITransferCopy(self) -> None: 

2458 """Test transfer using an absolute URI.""" 

2459 self._absolute_transfer("copy") 

2460 

2461 def _absolute_transfer(self, transfer: str) -> None: 

2462 self.create_butlers() 

2463 

2464 storageClassName = "StructuredData" 

2465 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2466 datasetTypeName = "random_data" 

2467 run = "run1" 

2468 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2469 

2470 dimensions = self.source_butler.dimensions.conform(()) 

2471 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2472 self.source_butler.registry.registerDatasetType(datasetType) 

2473 

2474 metrics = makeExampleMetrics() 

2475 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2476 dataId = DataCoordinate.make_empty(self.source_butler.dimensions) 

2477 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2478 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2479 dataset = FileDataset(path=temp, refs=source_refs) 

2480 self.source_butler.ingest(dataset, transfer="direct") 

2481 

2482 self.target_butler.transfer_from( 

2483 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2484 ) 

2485 

2486 uri = self.target_butler.getURI(dataset.refs[0]) 

2487 if transfer == "auto": 

2488 self.assertEqual(uri, temp) 

2489 else: 

2490 self.assertNotEqual(uri, temp) 

2491 

2492 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2493 """Test that a run can be transferred to another butler.""" 

2494 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2495 datasetTypeName = "random_data" 

2496 

2497 # Test will create 3 collections and we will want to transfer 

2498 # two of those three. 

2499 runs = ["run1", "run2", "other"] 

2500 

2501 # Also want to use two different dataset types to ensure that 

2502 # grouping works. 

2503 datasetTypeNames = ["random_data", "random_data_2"] 

2504 

2505 # Create the run collections in the source butler. 

2506 for run in runs: 

2507 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2508 

2509 # Create dimensions in source butler. 

2510 n_exposures = 30 

2511 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2512 self.source_butler.registry.insertDimensionData( 

2513 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2514 ) 

2515 self.source_butler.registry.insertDimensionData( 

2516 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2517 ) 

2518 self.source_butler.registry.insertDimensionData( 

2519 "day_obs", 

2520 { 

2521 "instrument": "DummyCamComp", 

2522 "id": 20250101, 

2523 }, 

2524 ) 

2525 

2526 for i in range(n_exposures): 

2527 self.source_butler.registry.insertDimensionData( 

2528 "group", {"instrument": "DummyCamComp", "name": f"group{i}"} 

2529 ) 

2530 self.source_butler.registry.insertDimensionData( 

2531 "exposure", 

2532 { 

2533 "instrument": "DummyCamComp", 

2534 "id": i, 

2535 "obs_id": f"exp{i}", 

2536 "physical_filter": "d-r", 

2537 "group": f"group{i}", 

2538 "day_obs": 20250101, 

2539 }, 

2540 ) 

2541 

2542 # Create dataset types in the source butler. 

2543 dimensions = self.source_butler.dimensions.conform(["instrument", "exposure"]) 

2544 for datasetTypeName in datasetTypeNames: 

2545 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2546 self.source_butler.registry.registerDatasetType(datasetType) 

2547 

2548 # Write a dataset to an unrelated run -- this will ensure that 

2549 # we are rewriting integer dataset ids in the target if necessary. 

2550 # Will not be relevant for UUID. 

2551 run = "distraction" 

2552 butler = Butler.from_config(butler=self.source_butler, run=run) 

2553 butler.put( 

2554 makeExampleMetrics(), 

2555 datasetTypeName, 

2556 exposure=1, 

2557 instrument="DummyCamComp", 

2558 physical_filter="d-r", 

2559 ) 

2560 

2561 # Write some example metrics to the source 

2562 butler = Butler.from_config(butler=self.source_butler) 

2563 

2564 # Set of DatasetRefs that should be in the list of refs to transfer 

2565 # but which will not be transferred. 

2566 deleted: set[DatasetRef] = set() 

2567 

2568 n_expected = 20 # Number of datasets expected to be transferred 

2569 source_refs = [] 

2570 for i in range(n_exposures): 

2571 # Put a third of datasets into each collection, only retain 

2572 # two thirds. 

2573 index = i % 3 

2574 run = runs[index] 

2575 datasetTypeName = datasetTypeNames[i % 2] 

2576 

2577 metric = MetricsExample( 

2578 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2579 ) 

2580 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2581 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2582 

2583 # Remove the datastore record using low-level API, but only 

2584 # for a specific index. 

2585 if purge and index == 1: 

2586 # For one of these delete the file as well. 

2587 # This allows the "missing" code to filter the 

2588 # file out. 

2589 # Access the individual datastores. 

2590 datastores = [] 

2591 if hasattr(butler._datastore, "datastores"): 

2592 datastores.extend(butler._datastore.datastores) 

2593 else: 

2594 datastores.append(butler._datastore) 

2595 

2596 if not deleted: 

2597 # For a chained datastore we need to remove 

2598 # files in each chain. 

2599 for datastore in datastores: 

2600 # The file might not be known to the datastore 

2601 # if constraints are used. 

2602 try: 

2603 primary, uris = datastore.getURIs(ref) 

2604 except FileNotFoundError: 

2605 continue 

2606 if primary and primary.scheme != "mem": 

2607 primary.remove() 

2608 for uri in uris.values(): 

2609 if uri.scheme != "mem": 

2610 uri.remove() 

2611 n_expected -= 1 

2612 deleted.add(ref) 

2613 

2614 # Remove the datastore record. 

2615 for datastore in datastores: 

2616 if hasattr(datastore, "removeStoredItemInfo"): 

2617 datastore.removeStoredItemInfo(ref) 

2618 

2619 if index < 2: 

2620 source_refs.append(ref) 

2621 if ref not in deleted: 

2622 new_metric = butler.get(ref) 

2623 self.assertEqual(new_metric, metric) 

2624 

2625 # Create some bad dataset types to ensure we check for inconsistent 

2626 # definitions. 

2627 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2628 for datasetTypeName in datasetTypeNames: 

2629 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2630 self.target_butler.registry.registerDatasetType(datasetType) 

2631 with self.assertRaises(ConflictingDefinitionError) as cm: 

2632 self.target_butler.transfer_from(self.source_butler, source_refs) 

2633 self.assertIn("dataset type differs", str(cm.exception)) 

2634 

2635 # And remove the bad definitions. 

2636 for datasetTypeName in datasetTypeNames: 

2637 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2638 

2639 # Transfer without creating dataset types should fail. 

2640 with self.assertRaises(KeyError): 

2641 self.target_butler.transfer_from(self.source_butler, source_refs) 

2642 

2643 # Transfer without creating dimensions should fail. 

2644 with self.assertRaises(ConflictingDefinitionError) as cm: 

2645 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2646 self.assertIn("dimension", str(cm.exception)) 

2647 

2648 # The failed transfer above leaves registry in an inconsistent 

2649 # state because the run is created but then rolled back without 

2650 # the collection cache being cleared. For now force a refresh. 

2651 # Can remove with DM-35498. 

2652 self.target_butler.registry.refresh() 

2653 

2654 # Do a dry run -- this should not have any effect on the target butler. 

2655 self.target_butler.transfer_from(self.source_butler, source_refs, dry_run=True) 

2656 

2657 # Transfer the records for one ref to test the alternative API. 

2658 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2659 self.target_butler.transfer_dimension_records_from(self.source_butler, [source_refs[0]]) 

2660 self.assertIn("number of records transferred: 1", ";".join(log_cm.output)) 

2661 

2662 # Now transfer them to the second butler, including dimensions. 

2663 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2664 transferred = self.target_butler.transfer_from( 

2665 self.source_butler, 

2666 source_refs, 

2667 register_dataset_types=True, 

2668 transfer_dimensions=True, 

2669 ) 

2670 self.assertEqual(len(transferred), n_expected) 

2671 log_output = ";".join(log_cm.output) 

2672 

2673 # A ChainedDatastore will use the in-memory datastore for mexists 

2674 # so we can not rely on the mexists log message. 

2675 self.assertIn("Number of datastore records found in source", log_output) 

2676 self.assertIn("Creating output run", log_output) 

2677 

2678 # Do the transfer twice to ensure that it will do nothing extra. 

2679 # Only do this if purge=True because it does not work for int 

2680 # dataset_id. 

2681 if purge: 

2682 # This should not need to register dataset types. 

2683 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2684 self.assertEqual(len(transferred), n_expected) 

2685 

2686 # Also do an explicit low-level transfer to trigger some 

2687 # edge cases. 

2688 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2689 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs) 

2690 log_output = ";".join(log_cm.output) 

2691 self.assertIn("no file artifacts exist", log_output) 

2692 

2693 with self.assertRaises((TypeError, AttributeError)): 

2694 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2695 

2696 with self.assertRaises(ValueError): 

2697 self.target_butler._datastore.transfer_from( 

2698 self.source_butler._datastore, source_refs, transfer="split" 

2699 ) 

2700 

2701 # Now try to get the same refs from the new butler. 

2702 for ref in source_refs: 

2703 if ref not in deleted: 

2704 new_metric = self.target_butler.get(ref) 

2705 old_metric = self.source_butler.get(ref) 

2706 self.assertEqual(new_metric, old_metric) 

2707 

2708 # Now prune run2 collection and create instead a CHAINED collection. 

2709 # This should block the transfer. 

2710 self.target_butler.removeRuns(["run2"], unstore=True) 

2711 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2712 with self.assertRaises(CollectionTypeError): 

2713 # Re-importing the run1 datasets can be problematic if they 

2714 # use integer IDs so filter those out. 

2715 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2716 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2717 

2718 

2719class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2720 """Test transfers using a chained datastore.""" 

2721 

2722 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2723 

2724 

2725class NullDatastoreTestCase(unittest.TestCase): 

2726 """Test that we can fall back to a null datastore.""" 

2727 

2728 # Need a good config to create the repo. 

2729 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2730 storageClassFactory: StorageClassFactory 

2731 

2732 @classmethod 

2733 def setUpClass(cls) -> None: 

2734 cls.storageClassFactory = StorageClassFactory() 

2735 cls.storageClassFactory.addFromConfig(cls.configFile) 

2736 

2737 def setUp(self) -> None: 

2738 """Create a new butler root for each test.""" 

2739 self.root = makeTestTempDir(TESTDIR) 

2740 Butler.makeRepo(self.root, config=Config(self.configFile)) 

2741 

2742 def tearDown(self) -> None: 

2743 removeTestTempDir(self.root) 

2744 

2745 def test_fallback(self) -> None: 

2746 # Read the butler config and mess with the datastore section. 

2747 config_path = os.path.join(self.root, "butler.yaml") 

2748 bad_config = Config(config_path) 

2749 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" 

2750 bad_config.dumpToUri(config_path) 

2751 

2752 with self.assertRaises(RuntimeError): 

2753 Butler(self.root, without_datastore=False) 

2754 

2755 with self.assertRaises(RuntimeError): 

2756 Butler.from_config(self.root, without_datastore=False) 

2757 

2758 butler = Butler.from_config(self.root, writeable=True, without_datastore=True) 

2759 self.assertIsInstance(butler._datastore, NullDatastore) 

2760 

2761 # Check that registry is working. 

2762 butler.registry.registerRun("MYRUN") 

2763 collections = butler.registry.queryCollections(...) 

2764 self.assertIn("MYRUN", set(collections)) 

2765 

2766 # Create a ref. 

2767 dimensions = butler.dimensions.conform([]) 

2768 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2769 datasetTypeName = "metric" 

2770 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2771 butler.registry.registerDatasetType(datasetType) 

2772 ref = DatasetRef(datasetType, {}, run="MYRUN") 

2773 

2774 # Check that datastore will complain. 

2775 with self.assertRaises(FileNotFoundError): 

2776 butler.get(ref) 

2777 with self.assertRaises(FileNotFoundError): 

2778 butler.getURI(ref) 

2779 

2780 

2781@unittest.skipIf(create_test_server is None, "Server dependencies not installed.") 

2782class ButlerServerTests(FileDatastoreButlerTests, unittest.TestCase): 

2783 """Test RemoteButler and Butler server.""" 

2784 

2785 configFile = None 

2786 predictionSupported = False 

2787 trustModeSupported = False 

2788 

2789 def setUp(self): 

2790 self.server_instance = self.enterContext(create_test_server(TESTDIR)) 

2791 

2792 def tearDown(self): 

2793 pass 

2794 

2795 def are_uris_equivalent(self, uri1: ResourcePath, uri2: ResourcePath) -> bool: 

2796 # S3 pre-signed URLs may end up with differing expiration times in the 

2797 # query parameters, so ignore query parameters when comparing. 

2798 return uri1.scheme == uri2.scheme and uri1.netloc == uri2.netloc and uri1.path == uri2.path 

2799 

2800 def create_empty_butler(self, run: str | None = None, writeable: bool | None = None) -> Butler: 

2801 return self.server_instance.hybrid_butler._clone(run=run) 

2802 

2803 def remove_dataset_out_of_band(self, butler: Butler, ref: DatasetRef) -> None: 

2804 # Can't delete a file via S3 signed URLs, so we need to reach in 

2805 # through DirectButler to delete the dataset. 

2806 uri = self.server_instance.direct_butler.getURI(ref) 

2807 uri.remove() 

2808 

2809 def testConstructor(self): 

2810 # RemoteButler constructor is tested in test_server.py and 

2811 # test_remote_butler.py. 

2812 pass 

2813 

2814 def testDafButlerRepositories(self): 

2815 # Loading of RemoteButler via repository index is tested in 

2816 # test_server.py. 

2817 pass 

2818 

2819 def testGetDatasetTypes(self) -> None: 

2820 # This is mostly a test of validateConfiguration, which is for 

2821 # validating Datastore configuration and thus isn't relevant to 

2822 # RemoteButler. 

2823 pass 

2824 

2825 def testMakeRepo(self) -> None: 

2826 # Only applies to DirectButler. 

2827 pass 

2828 

2829 # Pickling not yet implemented for RemoteButler/HybridButler. 

2830 @unittest.expectedFailure 

2831 def testPickle(self) -> None: 

2832 return super().testPickle() 

2833 

2834 def testStringification(self) -> None: 

2835 self.assertEqual( 

2836 str(self.server_instance.remote_butler), 

2837 "RemoteButler(https://test.example/api/butler/repo/testrepo)", 

2838 ) 

2839 

2840 def testTransaction(self) -> None: 

2841 # Transactions will never be supported for RemoteButler. 

2842 pass 

2843 

2844 def testPutTemplates(self) -> None: 

2845 # The Butler server instance is configured with different file naming 

2846 # templates than this test is expecting. 

2847 pass 

2848 

2849 

2850def setup_module(module: types.ModuleType) -> None: 

2851 """Set up the module for pytest.""" 

2852 clean_environment() 

2853 

2854 

2855if __name__ == "__main__": 

2856 clean_environment() 

2857 unittest.main()