Coverage for tests/test_butler.py: 13%

1326 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Tests for Butler. 

29""" 

30from __future__ import annotations 

31 

32import gc 

33import json 

34import logging 

35import os 

36import pathlib 

37import pickle 

38import posixpath 

39import random 

40import shutil 

41import string 

42import tempfile 

43import unittest 

44import uuid 

45from collections.abc import Mapping 

46from typing import TYPE_CHECKING, Any, cast 

47 

48try: 

49 import boto3 

50 import botocore 

51 from lsst.resources.s3utils import clean_test_environment_for_s3 

52 from moto import mock_s3 # type: ignore[import] 

53except ImportError: 

54 boto3 = None 

55 

56 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] 

57 """No-op decorator in case moto mock_s3 can not be imported.""" 

58 return None 

59 

60 

61try: 

62 # It's possible but silly to have testing.postgresql installed without 

63 # having the postgresql server installed (because then nothing in 

64 # testing.postgresql would work), so we use the presence of that module 

65 # to test whether we can expect the server to be available. 

66 import testing.postgresql # type: ignore[import] 

67except ImportError: 

68 testing = None 

69 

70import astropy.time 

71import sqlalchemy 

72from lsst.daf.butler import ( 

73 Butler, 

74 ButlerConfig, 

75 ButlerRepoIndex, 

76 CollectionType, 

77 Config, 

78 DataCoordinate, 

79 DatasetExistence, 

80 DatasetRef, 

81 DatasetType, 

82 FileDataset, 

83 StorageClassFactory, 

84 ValidationError, 

85 script, 

86) 

87from lsst.daf.butler.datastore import NullDatastore 

88from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError 

89from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

90from lsst.daf.butler.direct_butler import DirectButler 

91from lsst.daf.butler.registry import ( 

92 CollectionError, 

93 CollectionTypeError, 

94 ConflictingDefinitionError, 

95 DataIdValueError, 

96 MissingCollectionError, 

97 OrphanedRecordError, 

98) 

99from lsst.daf.butler.registry.sql_registry import SqlRegistry 

100from lsst.daf.butler.repo_relocation import BUTLER_ROOT_TAG 

101from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

102from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

103from lsst.resources import ResourcePath 

104from lsst.utils import doImportType 

105from lsst.utils.introspection import get_full_type_name 

106 

107if TYPE_CHECKING: 

108 import types 

109 

110 from lsst.daf.butler import Datastore, DimensionGroup, Registry, StorageClass 

111 

112TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

113 

114 

115def clean_environment() -> None: 

116 """Remove external environment variables that affect the tests.""" 

117 for k in ("DAF_BUTLER_REPOSITORY_INDEX",): 

118 os.environ.pop(k, None) 

119 

120 

121def makeExampleMetrics() -> MetricsExample: 

122 """Return example dataset suitable for tests.""" 

123 return MetricsExample( 

124 {"AM1": 5.2, "AM2": 30.6}, 

125 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

126 [563, 234, 456.7, 752, 8, 9, 27], 

127 ) 

128 

129 

130class TransactionTestError(Exception): 

131 """Specific error for testing transactions, to prevent misdiagnosing 

132 that might otherwise occur when a standard exception is used. 

133 """ 

134 

135 pass 

136 

137 

138class ButlerConfigTests(unittest.TestCase): 

139 """Simple tests for ButlerConfig that are not tested in any other test 

140 cases. 

141 """ 

142 

143 def testSearchPath(self) -> None: 

144 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

145 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

146 config1 = ButlerConfig(configFile) 

147 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

148 

149 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

150 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

151 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

152 self.assertIn("testConfigs", "\n".join(cm.output)) 

153 

154 key = ("datastore", "records", "table") 

155 self.assertNotEqual(config1[key], config2[key]) 

156 self.assertEqual(config2[key], "override_record") 

157 

158 

159class ButlerPutGetTests(TestCaseMixin): 

160 """Helper method for running a suite of put/get tests from different 

161 butler configurations. 

162 """ 

163 

164 root: str 

165 default_run = "ingésτ😺" 

166 storageClassFactory: StorageClassFactory 

167 configFile: str 

168 tmpConfigFile: str 

169 

170 @staticmethod 

171 def addDatasetType( 

172 datasetTypeName: str, dimensions: DimensionGroup, storageClass: StorageClass | str, registry: Registry 

173 ) -> DatasetType: 

174 """Create a DatasetType and register it""" 

175 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

176 registry.registerDatasetType(datasetType) 

177 return datasetType 

178 

179 @classmethod 

180 def setUpClass(cls) -> None: 

181 cls.storageClassFactory = StorageClassFactory() 

182 cls.storageClassFactory.addFromConfig(cls.configFile) 

183 

184 def assertGetComponents( 

185 self, 

186 butler: Butler, 

187 datasetRef: DatasetRef, 

188 components: tuple[str, ...], 

189 reference: Any, 

190 collections: Any = None, 

191 ) -> None: 

192 datasetType = datasetRef.datasetType 

193 dataId = datasetRef.dataId 

194 deferred = butler.getDeferred(datasetRef) 

195 

196 for component in components: 

197 compTypeName = datasetType.componentTypeName(component) 

198 result = butler.get(compTypeName, dataId, collections=collections) 

199 self.assertEqual(result, getattr(reference, component)) 

200 result_deferred = deferred.get(component=component) 

201 self.assertEqual(result_deferred, result) 

202 

203 def tearDown(self) -> None: 

204 removeTestTempDir(self.root) 

205 

206 def create_butler( 

207 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

208 ) -> tuple[DirectButler, DatasetType]: 

209 butler = Butler.from_config(self.tmpConfigFile, run=run) 

210 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

211 

212 collections = set(butler.registry.queryCollections()) 

213 self.assertEqual(collections, {run}) 

214 

215 # Create and register a DatasetType 

216 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

217 

218 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

219 

220 # Add needed Dimensions 

221 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

222 butler.registry.insertDimensionData( 

223 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

224 ) 

225 butler.registry.insertDimensionData( 

226 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

227 ) 

228 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

229 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

230 butler.registry.insertDimensionData( 

231 "visit", 

232 { 

233 "instrument": "DummyCamComp", 

234 "id": 423, 

235 "name": "fourtwentythree", 

236 "physical_filter": "d-r", 

237 "datetime_begin": visit_start, 

238 "datetime_end": visit_end, 

239 }, 

240 ) 

241 

242 # Add more visits for some later tests 

243 for visit_id in (424, 425): 

244 butler.registry.insertDimensionData( 

245 "visit", 

246 { 

247 "instrument": "DummyCamComp", 

248 "id": visit_id, 

249 "name": f"fourtwentyfour_{visit_id}", 

250 "physical_filter": "d-r", 

251 }, 

252 ) 

253 return butler, datasetType 

254 

255 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> DirectButler: 

256 # New datasets will be added to run and tag, but we will only look in 

257 # tag when looking up datasets. 

258 run = self.default_run 

259 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

260 assert butler.run is not None 

261 

262 # Create and store a dataset 

263 metric = makeExampleMetrics() 

264 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

265 

266 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

267 # and once with a DatasetType 

268 

269 # Keep track of any collections we add and do not clean up 

270 expected_collections = {run} 

271 

272 counter = 0 

273 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

274 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

275 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

276 # Since we are using subTest we can get cascading failures 

277 # here with the first attempt failing and the others failing 

278 # immediately because the dataset already exists. Work around 

279 # this by using a distinct run collection each time 

280 counter += 1 

281 this_run = f"put_run_{counter}" 

282 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

283 expected_collections.update({this_run}) 

284 

285 with self.subTest(args=args): 

286 kwargs: dict[str, Any] = {} 

287 if not isinstance(args[0], DatasetRef): # type: ignore 

288 kwargs["run"] = this_run 

289 ref = butler.put(metric, *args, **kwargs) 

290 self.assertIsInstance(ref, DatasetRef) 

291 

292 # Test get of a ref. 

293 metricOut = butler.get(ref) 

294 self.assertEqual(metric, metricOut) 

295 # Test get 

296 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

297 self.assertEqual(metric, metricOut) 

298 # Test get with a datasetRef 

299 metricOut = butler.get(ref) 

300 self.assertEqual(metric, metricOut) 

301 # Test getDeferred with dataId 

302 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

303 self.assertEqual(metric, metricOut) 

304 # Test getDeferred with a ref 

305 metricOut = butler.getDeferred(ref).get() 

306 self.assertEqual(metric, metricOut) 

307 

308 # Check we can get components 

309 if storageClass.isComposite(): 

310 self.assertGetComponents( 

311 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

312 ) 

313 

314 primary_uri, secondary_uris = butler.getURIs(ref) 

315 n_uris = len(secondary_uris) 

316 if primary_uri: 

317 n_uris += 1 

318 

319 # Can the artifacts themselves be retrieved? 

320 if not butler._datastore.isEphemeral: 

321 # Create a temporary directory to hold the retrieved 

322 # artifacts. 

323 with tempfile.TemporaryDirectory( 

324 prefix="butler-artifacts-", ignore_cleanup_errors=True 

325 ) as artifact_root: 

326 root_uri = ResourcePath(artifact_root, forceDirectory=True) 

327 

328 for preserve_path in (True, False): 

329 destination = root_uri.join(f"{preserve_path}_{counter}/") 

330 log = logging.getLogger("lsst.x") 

331 log.warning("Using destination %s for args %s", destination, args) 

332 # Use copy so that we can test that overwrite 

333 # protection works (using "auto" for File URIs 

334 # would use hard links and subsequent transfer 

335 # would work because it knows they are the same 

336 # file). 

337 transferred = butler.retrieveArtifacts( 

338 [ref], destination, preserve_path=preserve_path, transfer="copy" 

339 ) 

340 self.assertGreater(len(transferred), 0) 

341 artifacts = list(ResourcePath.findFileResources([destination])) 

342 self.assertEqual(set(transferred), set(artifacts)) 

343 

344 for artifact in transferred: 

345 path_in_destination = artifact.relative_to(destination) 

346 self.assertIsNotNone(path_in_destination) 

347 assert path_in_destination is not None 

348 

349 # When path is not preserved there should not 

350 # be any path separators. 

351 num_seps = path_in_destination.count("/") 

352 if preserve_path: 

353 self.assertGreater(num_seps, 0) 

354 else: 

355 self.assertEqual(num_seps, 0) 

356 

357 self.assertEqual( 

358 len(artifacts), 

359 n_uris, 

360 "Comparing expected artifacts vs actual:" 

361 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

362 ) 

363 

364 if preserve_path: 

365 # No need to run these twice 

366 with self.assertRaises(ValueError): 

367 butler.retrieveArtifacts([ref], destination, transfer="move") 

368 

369 with self.assertRaises(FileExistsError): 

370 butler.retrieveArtifacts([ref], destination) 

371 

372 transferred_again = butler.retrieveArtifacts( 

373 [ref], destination, preserve_path=preserve_path, overwrite=True 

374 ) 

375 self.assertEqual(set(transferred_again), set(transferred)) 

376 

377 # Now remove the dataset completely. 

378 butler.pruneDatasets([ref], purge=True, unstore=True) 

379 # Lookup with original args should still fail. 

380 kwargs = {"collections": this_run} 

381 if isinstance(args[0], DatasetRef): 

382 kwargs = {} # Prevent warning from being issued. 

383 self.assertFalse(butler.exists(*args, **kwargs)) 

384 # get() should still fail. 

385 with self.assertRaises(FileNotFoundError): 

386 butler.get(ref) 

387 # Registry shouldn't be able to find it by dataset_id anymore. 

388 self.assertIsNone(butler.get_dataset(ref.id)) 

389 

390 # Do explicit registry removal since we know they are 

391 # empty 

392 butler.registry.removeCollection(this_run) 

393 expected_collections.remove(this_run) 

394 

395 # Create DatasetRef for put using default run. 

396 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

397 

398 # Check that getDeferred fails with standalone ref. 

399 with self.assertRaises(LookupError): 

400 butler.getDeferred(refIn) 

401 

402 # Put the dataset again, since the last thing we did was remove it 

403 # and we want to use the default collection. 

404 ref = butler.put(metric, refIn) 

405 

406 # Get with parameters 

407 stop = 4 

408 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

409 self.assertNotEqual(metric, sliced) 

410 self.assertEqual(metric.summary, sliced.summary) 

411 self.assertEqual(metric.output, sliced.output) 

412 assert metric.data is not None # for mypy 

413 self.assertEqual(metric.data[:stop], sliced.data) 

414 # getDeferred with parameters 

415 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

416 self.assertNotEqual(metric, sliced) 

417 self.assertEqual(metric.summary, sliced.summary) 

418 self.assertEqual(metric.output, sliced.output) 

419 self.assertEqual(metric.data[:stop], sliced.data) 

420 # getDeferred with deferred parameters 

421 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

422 self.assertNotEqual(metric, sliced) 

423 self.assertEqual(metric.summary, sliced.summary) 

424 self.assertEqual(metric.output, sliced.output) 

425 self.assertEqual(metric.data[:stop], sliced.data) 

426 

427 if storageClass.isComposite(): 

428 # Check that components can be retrieved 

429 metricOut = butler.get(ref.datasetType.name, dataId) 

430 compNameS = ref.datasetType.componentTypeName("summary") 

431 compNameD = ref.datasetType.componentTypeName("data") 

432 summary = butler.get(compNameS, dataId) 

433 self.assertEqual(summary, metric.summary) 

434 data = butler.get(compNameD, dataId) 

435 self.assertEqual(data, metric.data) 

436 

437 if "counter" in storageClass.derivedComponents: 

438 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

439 self.assertEqual(count, len(data)) 

440 

441 count = butler.get( 

442 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

443 ) 

444 self.assertEqual(count, stop) 

445 

446 compRef = butler.find_dataset(compNameS, dataId, collections=butler.collections) 

447 assert compRef is not None 

448 summary = butler.get(compRef) 

449 self.assertEqual(summary, metric.summary) 

450 

451 # Create a Dataset type that has the same name but is inconsistent. 

452 inconsistentDatasetType = DatasetType( 

453 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

454 ) 

455 

456 # Getting with a dataset type that does not match registry fails 

457 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"): 

458 butler.get(inconsistentDatasetType, dataId) 

459 

460 # Combining a DatasetRef with a dataId should fail 

461 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

462 butler.get(ref, dataId) 

463 # Getting with an explicit ref should fail if the id doesn't match. 

464 with self.assertRaises(FileNotFoundError): 

465 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

466 

467 # Getting a dataset with unknown parameters should fail 

468 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

469 butler.get(ref, parameters={"unsupported": True}) 

470 

471 # Check we have a collection 

472 collections = set(butler.registry.queryCollections()) 

473 self.assertEqual(collections, expected_collections) 

474 

475 # Clean up to check that we can remove something that may have 

476 # already had a component removed 

477 butler.pruneDatasets([ref], unstore=True, purge=True) 

478 

479 # Add the same ref again, so we can check that duplicate put fails. 

480 ref = butler.put(metric, datasetType, dataId) 

481 

482 # Repeat put will fail. 

483 with self.assertRaisesRegex( 

484 ConflictingDefinitionError, "A database constraint failure was triggered" 

485 ): 

486 butler.put(metric, datasetType, dataId) 

487 

488 # Remove the datastore entry. 

489 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

490 

491 # Put will still fail 

492 with self.assertRaisesRegex( 

493 ConflictingDefinitionError, "A database constraint failure was triggered" 

494 ): 

495 butler.put(metric, datasetType, dataId) 

496 

497 # Repeat the same sequence with resolved ref. 

498 butler.pruneDatasets([ref], unstore=True, purge=True) 

499 ref = butler.put(metric, refIn) 

500 

501 # Repeat put will fail. 

502 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

503 butler.put(metric, refIn) 

504 

505 # Remove the datastore entry. 

506 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

507 

508 # In case of resolved ref this write will succeed. 

509 ref = butler.put(metric, refIn) 

510 

511 # Leave the dataset in place since some downstream tests require 

512 # something to be present 

513 

514 return butler 

515 

516 def testDeferredCollectionPassing(self) -> None: 

517 # Construct a butler with no run or collection, but make it writeable. 

518 butler = Butler.from_config(self.tmpConfigFile, writeable=True) 

519 # Create and register a DatasetType 

520 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

521 datasetType = self.addDatasetType( 

522 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

523 ) 

524 # Add needed Dimensions 

525 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

526 butler.registry.insertDimensionData( 

527 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

528 ) 

529 butler.registry.insertDimensionData( 

530 "visit", 

531 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

532 ) 

533 dataId = {"instrument": "DummyCamComp", "visit": 423} 

534 # Create dataset. 

535 metric = makeExampleMetrics() 

536 # Register a new run and put dataset. 

537 run = "deferred" 

538 self.assertTrue(butler.registry.registerRun(run)) 

539 # Second time it will be allowed but indicate no-op 

540 self.assertFalse(butler.registry.registerRun(run)) 

541 ref = butler.put(metric, datasetType, dataId, run=run) 

542 # Putting with no run should fail with TypeError. 

543 with self.assertRaises(CollectionError): 

544 butler.put(metric, datasetType, dataId) 

545 # Dataset should exist. 

546 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

547 # We should be able to get the dataset back, but with and without 

548 # a deferred dataset handle. 

549 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

550 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

551 # Trying to find the dataset without any collection is a TypeError. 

552 self.assertFalse(butler.exists(datasetType, dataId)) 

553 with self.assertRaises(CollectionError): 

554 butler.get(datasetType, dataId) 

555 # Associate the dataset with a different collection. 

556 butler.registry.registerCollection("tagged") 

557 butler.registry.associate("tagged", [ref]) 

558 # Deleting the dataset from the new collection should make it findable 

559 # in the original collection. 

560 butler.pruneDatasets([ref], tags=["tagged"]) 

561 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

562 

563 

564class ButlerTests(ButlerPutGetTests): 

565 """Tests for Butler.""" 

566 

567 useTempRoot = True 

568 validationCanFail: bool 

569 fullConfigKey: str | None 

570 registryStr: str | None 

571 datastoreName: list[str] | None 

572 datastoreStr: list[str] 

573 

574 def setUp(self) -> None: 

575 """Create a new butler root for each test.""" 

576 self.root = makeTestTempDir(TESTDIR) 

577 Butler.makeRepo(self.root, config=Config(self.configFile)) 

578 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

579 

580 def testConstructor(self) -> None: 

581 """Independent test of constructor.""" 

582 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

583 self.assertIsInstance(butler, Butler) 

584 

585 # Check that butler.yaml is added automatically. 

586 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

587 config_dir = self.tmpConfigFile[: -len(end)] 

588 butler = Butler.from_config(config_dir, run=self.default_run) 

589 self.assertIsInstance(butler, Butler) 

590 

591 # Even with a ResourcePath. 

592 butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

593 self.assertIsInstance(butler, Butler) 

594 

595 collections = set(butler.registry.queryCollections()) 

596 self.assertEqual(collections, {self.default_run}) 

597 

598 # Check that some special characters can be included in run name. 

599 special_run = "u@b.c-A" 

600 butler_special = Butler.from_config(butler=butler, run=special_run) 

601 collections = set(butler_special.registry.queryCollections("*@*")) 

602 self.assertEqual(collections, {special_run}) 

603 

604 butler2 = Butler.from_config(butler=butler, collections=["other"]) 

605 self.assertEqual(butler2.collections, ("other",)) 

606 self.assertIsNone(butler2.run) 

607 self.assertIs(butler._datastore, butler2._datastore) 

608 

609 # Test that we can use an environment variable to find this 

610 # repository. 

611 butler_index = Config() 

612 butler_index["label"] = self.tmpConfigFile 

613 for suffix in (".yaml", ".json"): 

614 # Ensure that the content differs so that we know that 

615 # we aren't reusing the cache. 

616 bad_label = f"file://bucket/not_real{suffix}" 

617 butler_index["bad_label"] = bad_label 

618 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

619 butler_index.dumpToUri(temp_file) 

620 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

621 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"}) 

622 uri = Butler.get_repo_uri("bad_label") 

623 self.assertEqual(uri, ResourcePath(bad_label)) 

624 uri = Butler.get_repo_uri("label") 

625 butler = Butler.from_config(uri, writeable=False) 

626 self.assertIsInstance(butler, Butler) 

627 butler = Butler.from_config("label", writeable=False) 

628 self.assertIsInstance(butler, Butler) 

629 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

630 Butler.from_config("not_there", writeable=False) 

631 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): 

632 Butler.from_config("bad_label") 

633 with self.assertRaises(FileNotFoundError): 

634 # Should ignore aliases. 

635 Butler.from_config(ResourcePath("label", forceAbsolute=False)) 

636 with self.assertRaises(KeyError) as cm: 

637 Butler.get_repo_uri("missing") 

638 self.assertEqual( 

639 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False) 

640 ) 

641 self.assertIn("not known to", str(cm.exception)) 

642 # Should report no failure. 

643 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "") 

644 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

645 # Now with empty configuration. 

646 butler_index = Config() 

647 butler_index.dumpToUri(temp_file) 

648 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

649 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): 

650 Butler.from_config("label") 

651 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

652 # Now with bad contents. 

653 with open(temp_file.ospath, "w") as fh: 

654 print("'", file=fh) 

655 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

656 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): 

657 Butler.from_config("label") 

658 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

659 with self.assertRaises(FileNotFoundError): 

660 Butler.get_repo_uri("label") 

661 self.assertEqual(Butler.get_known_repos(), set()) 

662 

663 with self.assertRaisesRegex(FileNotFoundError, "index file not found"): 

664 Butler.from_config("label") 

665 

666 # Check that we can create Butler when the alias file is not found. 

667 butler = Butler.from_config(self.tmpConfigFile, writeable=False) 

668 self.assertIsInstance(butler, Butler) 

669 with self.assertRaises(KeyError) as cm: 

670 # No environment variable set. 

671 Butler.get_repo_uri("label") 

672 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) 

673 self.assertIn("No repository index defined", str(cm.exception)) 

674 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): 

675 # No aliases registered. 

676 Butler.from_config("not_there") 

677 self.assertEqual(Butler.get_known_repos(), set()) 

678 

679 def testBasicPutGet(self) -> None: 

680 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

681 self.runPutGetTest(storageClass, "test_metric") 

682 

683 def testCompositePutGetConcrete(self) -> None: 

684 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

685 butler = self.runPutGetTest(storageClass, "test_metric") 

686 

687 # Should *not* be disassembled 

688 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

689 self.assertEqual(len(datasets), 1) 

690 uri, components = butler.getURIs(datasets[0]) 

691 self.assertIsInstance(uri, ResourcePath) 

692 self.assertFalse(components) 

693 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

694 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

695 

696 # Predicted dataset 

697 dataId = {"instrument": "DummyCamComp", "visit": 424} 

698 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

699 self.assertFalse(components) 

700 self.assertIsInstance(uri, ResourcePath) 

701 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

702 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

703 

704 def testCompositePutGetVirtual(self) -> None: 

705 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

706 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

707 

708 # Should be disassembled 

709 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

710 self.assertEqual(len(datasets), 1) 

711 uri, components = butler.getURIs(datasets[0]) 

712 

713 if butler._datastore.isEphemeral: 

714 # Never disassemble in-memory datastore 

715 self.assertIsInstance(uri, ResourcePath) 

716 self.assertFalse(components) 

717 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

718 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

719 else: 

720 self.assertIsNone(uri) 

721 self.assertEqual(set(components), set(storageClass.components)) 

722 for compuri in components.values(): 

723 self.assertIsInstance(compuri, ResourcePath) 

724 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

725 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

726 

727 # Predicted dataset 

728 dataId = {"instrument": "DummyCamComp", "visit": 424} 

729 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

730 

731 if butler._datastore.isEphemeral: 

732 # Never disassembled 

733 self.assertIsInstance(uri, ResourcePath) 

734 self.assertFalse(components) 

735 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

736 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

737 else: 

738 self.assertIsNone(uri) 

739 self.assertEqual(set(components), set(storageClass.components)) 

740 for compuri in components.values(): 

741 self.assertIsInstance(compuri, ResourcePath) 

742 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

743 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

744 

745 def testStorageClassOverrideGet(self) -> None: 

746 """Test storage class conversion on get with override.""" 

747 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

748 datasetTypeName = "anything" 

749 run = self.default_run 

750 

751 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

752 

753 # Create and store a dataset. 

754 metric = makeExampleMetrics() 

755 dataId = {"instrument": "DummyCamComp", "visit": 423} 

756 

757 ref = butler.put(metric, datasetType, dataId) 

758 

759 # Return native type. 

760 retrieved = butler.get(ref) 

761 self.assertEqual(retrieved, metric) 

762 

763 # Specify an override. 

764 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

765 model = butler.get(ref, storageClass=new_sc) 

766 self.assertNotEqual(type(model), type(retrieved)) 

767 self.assertIs(type(model), new_sc.pytype) 

768 self.assertEqual(retrieved, model) 

769 

770 # Defer but override later. 

771 deferred = butler.getDeferred(ref) 

772 model = deferred.get(storageClass=new_sc) 

773 self.assertIs(type(model), new_sc.pytype) 

774 self.assertEqual(retrieved, model) 

775 

776 # Defer but override up front. 

777 deferred = butler.getDeferred(ref, storageClass=new_sc) 

778 model = deferred.get() 

779 self.assertIs(type(model), new_sc.pytype) 

780 self.assertEqual(retrieved, model) 

781 

782 # Retrieve a component. Should be a tuple. 

783 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

784 self.assertIs(type(data), tuple) 

785 self.assertEqual(data, tuple(retrieved.data)) 

786 

787 # Parameter on the write storage class should work regardless 

788 # of read storage class. 

789 data = butler.get( 

790 "anything.data", 

791 dataId, 

792 storageClass="StructuredDataDataTestTuple", 

793 parameters={"slice": slice(2, 4)}, 

794 ) 

795 self.assertEqual(len(data), 2) 

796 

797 # Try a parameter that is known to the read storage class but not 

798 # the write storage class. 

799 with self.assertRaises(KeyError): 

800 butler.get( 

801 "anything.data", 

802 dataId, 

803 storageClass="StructuredDataDataTestTuple", 

804 parameters={"xslice": slice(2, 4)}, 

805 ) 

806 

807 def testPytypePutCoercion(self) -> None: 

808 """Test python type coercion on Butler.get and put.""" 

809 # Store some data with the normal example storage class. 

810 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

811 datasetTypeName = "test_metric" 

812 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

813 

814 dataId = {"instrument": "DummyCamComp", "visit": 423} 

815 

816 # Put a dict and this should coerce to a MetricsExample 

817 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

818 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

819 test_metric = butler.get(metric_ref) 

820 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

821 self.assertEqual(test_metric.summary, test_dict["summary"]) 

822 self.assertEqual(test_metric.output, test_dict["output"]) 

823 

824 # Check that the put still works if a DatasetType is given with 

825 # a definition matching this python type. 

826 registry_type = butler.get_dataset_type(datasetTypeName) 

827 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

828 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

829 self.assertEqual(metric2_ref.datasetType, registry_type) 

830 

831 # The get will return the type expected by registry. 

832 test_metric2 = butler.get(metric2_ref) 

833 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

834 

835 # Make a new DatasetRef with the compatible but different DatasetType. 

836 # This should now return a dict. 

837 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

838 test_dict2 = butler.get(new_ref) 

839 self.assertEqual(get_full_type_name(test_dict2), "dict") 

840 

841 # Get it again with the wrong dataset type definition using get() 

842 # rather than get(). This should be consistent with get() 

843 # behavior and return the type of the DatasetType. 

844 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

845 self.assertEqual(get_full_type_name(test_dict3), "dict") 

846 

847 def testIngest(self) -> None: 

848 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

849 

850 # Create and register a DatasetType 

851 dimensions = butler.dimensions.conform(["instrument", "visit", "detector"]) 

852 

853 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

854 datasetTypeName = "metric" 

855 

856 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

857 

858 # Add needed Dimensions 

859 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

860 butler.registry.insertDimensionData( 

861 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

862 ) 

863 for detector in (1, 2): 

864 butler.registry.insertDimensionData( 

865 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

866 ) 

867 

868 butler.registry.insertDimensionData( 

869 "visit", 

870 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

871 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

872 ) 

873 

874 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

875 dataRoot = os.path.join(TESTDIR, "data", "basic") 

876 datasets = [] 

877 for detector in (1, 2): 

878 detector_name = f"detector_{detector}" 

879 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

880 dataId = butler.registry.expandDataId( 

881 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

882 ) 

883 # Create a DatasetRef for ingest 

884 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

885 

886 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

887 

888 butler.ingest(*datasets, transfer="copy") 

889 

890 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

891 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

892 

893 metrics1 = butler.get(datasetTypeName, dataId1) 

894 metrics2 = butler.get(datasetTypeName, dataId2) 

895 self.assertNotEqual(metrics1, metrics2) 

896 

897 # Compare URIs 

898 uri1 = butler.getURI(datasetTypeName, dataId1) 

899 uri2 = butler.getURI(datasetTypeName, dataId2) 

900 self.assertNotEqual(uri1, uri2) 

901 

902 # Now do a multi-dataset but single file ingest 

903 metricFile = os.path.join(dataRoot, "detectors.yaml") 

904 refs = [] 

905 for detector in (1, 2): 

906 detector_name = f"detector_{detector}" 

907 dataId = butler.registry.expandDataId( 

908 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

909 ) 

910 # Create a DatasetRef for ingest 

911 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

912 

913 # Test "move" transfer to ensure that the files themselves 

914 # have disappeared following ingest. 

915 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

916 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

917 

918 datasets = [] 

919 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

920 

921 # For first ingest use copy. 

922 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

923 

924 # Now try to ingest again in "execution butler" mode where 

925 # the registry entries exist but the datastore does not have 

926 # the files. We also need to strip the dimension records to ensure 

927 # that they will be re-added by the ingest. 

928 ref = datasets[0].refs[0] 

929 datasets[0].refs = [ 

930 cast( 

931 DatasetRef, 

932 butler.find_dataset(ref.datasetType, data_id=ref.dataId, collections=ref.run), 

933 ) 

934 for ref in datasets[0].refs 

935 ] 

936 all_refs = [] 

937 for dataset in datasets: 

938 refs = [] 

939 for ref in dataset.refs: 

940 # Create a dict from the dataId to drop the records. 

941 new_data_id = dict(ref.dataId.required) 

942 new_ref = butler.find_dataset(ref.datasetType, new_data_id, collections=ref.run) 

943 assert new_ref is not None 

944 self.assertFalse(new_ref.dataId.hasRecords()) 

945 refs.append(new_ref) 

946 dataset.refs = refs 

947 all_refs.extend(dataset.refs) 

948 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

949 

950 # Use move mode to test that the file is deleted. Also 

951 # disable recording of file size. 

952 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

953 

954 # Check that every ref now has records. 

955 for dataset in datasets: 

956 for ref in dataset.refs: 

957 self.assertTrue(ref.dataId.hasRecords()) 

958 

959 # Ensure that the file has disappeared. 

960 self.assertFalse(tempFile.exists()) 

961 

962 # Check that the datastore recorded no file size. 

963 # Not all datastores can support this. 

964 try: 

965 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

966 self.assertEqual(infos[0].file_size, -1) 

967 except AttributeError: 

968 pass 

969 

970 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

971 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

972 

973 multi1 = butler.get(datasetTypeName, dataId1) 

974 multi2 = butler.get(datasetTypeName, dataId2) 

975 

976 self.assertEqual(multi1, metrics1) 

977 self.assertEqual(multi2, metrics2) 

978 

979 # Compare URIs 

980 uri1 = butler.getURI(datasetTypeName, dataId1) 

981 uri2 = butler.getURI(datasetTypeName, dataId2) 

982 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

983 

984 # Test that removing one does not break the second 

985 # This line will issue a warning log message for a ChainedDatastore 

986 # that uses an InMemoryDatastore since in-memory can not ingest 

987 # files. 

988 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

989 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

990 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

991 multi2b = butler.get(datasetTypeName, dataId2) 

992 self.assertEqual(multi2, multi2b) 

993 

994 # Ensure we can ingest 0 datasets 

995 datasets = [] 

996 butler.ingest(*datasets) 

997 

998 def testPickle(self) -> None: 

999 """Test pickle support.""" 

1000 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1001 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

1002 butlerOut = pickle.loads(pickle.dumps(butler)) 

1003 self.assertIsInstance(butlerOut, Butler) 

1004 self.assertEqual(butlerOut._config, butler._config) 

1005 self.assertEqual(butlerOut.collections, butler.collections) 

1006 self.assertEqual(butlerOut.run, butler.run) 

1007 

1008 def testGetDatasetTypes(self) -> None: 

1009 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1010 dimensions = butler.dimensions.conform(["instrument", "visit", "physical_filter"]) 

1011 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

1012 ( 

1013 "instrument", 

1014 [ 

1015 {"instrument": "DummyCam"}, 

1016 {"instrument": "DummyHSC"}, 

1017 {"instrument": "DummyCamComp"}, 

1018 ], 

1019 ), 

1020 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

1021 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]), 

1022 ] 

1023 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1024 # Add needed Dimensions 

1025 for element, data in dimensionEntries: 

1026 butler.registry.insertDimensionData(element, *data) 

1027 

1028 # When a DatasetType is added to the registry entries are not created 

1029 # for components but querying them can return the components. 

1030 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1031 components = set() 

1032 for datasetTypeName in datasetTypeNames: 

1033 # Create and register a DatasetType 

1034 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1035 

1036 for componentName in storageClass.components: 

1037 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1038 

1039 fromRegistry: set[DatasetType] = set() 

1040 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1041 fromRegistry.add(parent_dataset_type) 

1042 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1043 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1044 

1045 # Now that we have some dataset types registered, validate them 

1046 butler.validateConfiguration( 

1047 ignore=[ 

1048 "test_metric_comp", 

1049 "metric3", 

1050 "metric5", 

1051 "calexp", 

1052 "DummySC", 

1053 "datasetType.component", 

1054 "random_data", 

1055 "random_data_2", 

1056 ] 

1057 ) 

1058 

1059 # Add a new datasetType that will fail template validation 

1060 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1061 if self.validationCanFail: 

1062 with self.assertRaises(ValidationError): 

1063 butler.validateConfiguration() 

1064 

1065 # Rerun validation but with a subset of dataset type names 

1066 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1067 

1068 # Rerun validation but ignore the bad datasetType 

1069 butler.validateConfiguration( 

1070 ignore=[ 

1071 "test_metric_comp", 

1072 "metric3", 

1073 "metric5", 

1074 "calexp", 

1075 "DummySC", 

1076 "datasetType.component", 

1077 "random_data", 

1078 "random_data_2", 

1079 ] 

1080 ) 

1081 

1082 def testTransaction(self) -> None: 

1083 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1084 datasetTypeName = "test_metric" 

1085 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1086 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1087 ("instrument", {"instrument": "DummyCam"}), 

1088 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1089 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1090 ) 

1091 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1092 metric = makeExampleMetrics() 

1093 dataId = {"instrument": "DummyCam", "visit": 42} 

1094 # Create and register a DatasetType 

1095 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1096 with self.assertRaises(TransactionTestError): 

1097 with butler.transaction(): 

1098 # Add needed Dimensions 

1099 for args in dimensionEntries: 

1100 butler.registry.insertDimensionData(*args) 

1101 # Store a dataset 

1102 ref = butler.put(metric, datasetTypeName, dataId) 

1103 self.assertIsInstance(ref, DatasetRef) 

1104 # Test get of a ref. 

1105 metricOut = butler.get(ref) 

1106 self.assertEqual(metric, metricOut) 

1107 # Test get 

1108 metricOut = butler.get(datasetTypeName, dataId) 

1109 self.assertEqual(metric, metricOut) 

1110 # Check we can get components 

1111 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1112 raise TransactionTestError("This should roll back the entire transaction") 

1113 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1114 butler.registry.expandDataId(dataId) 

1115 # Should raise LookupError for missing data ID value 

1116 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1117 butler.get(datasetTypeName, dataId) 

1118 # Also check explicitly if Dataset entry is missing 

1119 self.assertIsNone(butler.find_dataset(datasetType, dataId, collections=butler.collections)) 

1120 # Direct retrieval should not find the file in the Datastore 

1121 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1122 butler.get(ref) 

1123 

1124 def testMakeRepo(self) -> None: 

1125 """Test that we can write butler configuration to a new repository via 

1126 the Butler.makeRepo interface and then instantiate a butler from the 

1127 repo root. 

1128 """ 

1129 # Do not run the test if we know this datastore configuration does 

1130 # not support a file system root 

1131 if self.fullConfigKey is None: 

1132 return 

1133 

1134 # create two separate directories 

1135 root1 = tempfile.mkdtemp(dir=self.root) 

1136 root2 = tempfile.mkdtemp(dir=self.root) 

1137 

1138 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1139 limited = Config(self.configFile) 

1140 butler1 = Butler.from_config(butlerConfig) 

1141 assert isinstance(butler1, DirectButler), "Expect DirectButler in configuration" 

1142 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1143 full = Config(self.tmpConfigFile) 

1144 butler2 = Butler.from_config(butlerConfig) 

1145 assert isinstance(butler2, DirectButler), "Expect DirectButler in configuration" 

1146 # Butlers should have the same configuration regardless of whether 

1147 # defaults were expanded. 

1148 self.assertEqual(butler1._config, butler2._config) 

1149 # Config files loaded directly should not be the same. 

1150 self.assertNotEqual(limited, full) 

1151 # Make sure "limited" doesn't have a few keys we know it should be 

1152 # inheriting from defaults. 

1153 self.assertIn(self.fullConfigKey, full) 

1154 self.assertNotIn(self.fullConfigKey, limited) 

1155 

1156 # Collections don't appear until something is put in them 

1157 collections1 = set(butler1.registry.queryCollections()) 

1158 self.assertEqual(collections1, set()) 

1159 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1160 

1161 # Check that a config with no associated file name will not 

1162 # work properly with relocatable Butler repo 

1163 butlerConfig.configFile = None 

1164 with self.assertRaises(ValueError): 

1165 Butler.from_config(butlerConfig) 

1166 

1167 with self.assertRaises(FileExistsError): 

1168 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1169 

1170 def testStringification(self) -> None: 

1171 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1172 butlerStr = str(butler) 

1173 

1174 if self.datastoreStr is not None: 

1175 for testStr in self.datastoreStr: 

1176 self.assertIn(testStr, butlerStr) 

1177 if self.registryStr is not None: 

1178 self.assertIn(self.registryStr, butlerStr) 

1179 

1180 datastoreName = butler._datastore.name 

1181 if self.datastoreName is not None: 

1182 for testStr in self.datastoreName: 

1183 self.assertIn(testStr, datastoreName) 

1184 

1185 def testButlerRewriteDataId(self) -> None: 

1186 """Test that dataIds can be rewritten based on dimension records.""" 

1187 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1188 

1189 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1190 datasetTypeName = "random_data" 

1191 

1192 # Create dimension records. 

1193 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1194 butler.registry.insertDimensionData( 

1195 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1196 ) 

1197 butler.registry.insertDimensionData( 

1198 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1199 ) 

1200 

1201 dimensions = butler.dimensions.conform(["instrument", "exposure"]) 

1202 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1203 butler.registry.registerDatasetType(datasetType) 

1204 

1205 n_exposures = 5 

1206 dayobs = 20210530 

1207 

1208 for i in range(n_exposures): 

1209 butler.registry.insertDimensionData( 

1210 "exposure", 

1211 { 

1212 "instrument": "DummyCamComp", 

1213 "id": i, 

1214 "obs_id": f"exp{i}", 

1215 "seq_num": i, 

1216 "day_obs": dayobs, 

1217 "physical_filter": "d-r", 

1218 }, 

1219 ) 

1220 

1221 # Write some data. 

1222 for i in range(n_exposures): 

1223 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1224 

1225 # Use the seq_num for the put to test rewriting. 

1226 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1227 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1228 

1229 # Check that the exposure is correct in the dataId 

1230 self.assertEqual(ref.dataId["exposure"], i) 

1231 

1232 # and check that we can get the dataset back with the same dataId 

1233 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1234 self.assertEqual(new_metric, metric) 

1235 

1236 def testGetDatasetCollectionCaching(self): 

1237 # Prior to DM-41117, there was a bug where get_dataset would throw 

1238 # MissingCollectionError if you tried to fetch a dataset that was added 

1239 # after the collection cache was last updated. 

1240 reader_butler, datasetType = self.create_butler(self.default_run, "int", "datasettypename") 

1241 writer_butler = Butler.from_config(self.tmpConfigFile, writeable=True, run="new_run") 

1242 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1243 put_ref = writer_butler.put(123, datasetType, dataId) 

1244 get_ref = reader_butler.get_dataset(put_ref.id) 

1245 self.assertEqual(get_ref.id, put_ref.id) 

1246 

1247 

1248class FileDatastoreButlerTests(ButlerTests): 

1249 """Common tests and specialization of ButlerTests for butlers backed 

1250 by datastores that inherit from FileDatastore. 

1251 """ 

1252 

1253 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1254 """Check if file exists at a given path (relative to root). 

1255 

1256 Test testPutTemplates verifies actual physical existance of the files 

1257 in the requested location. 

1258 """ 

1259 uri = ResourcePath(root, forceDirectory=True) 

1260 return uri.join(relpath).exists() 

1261 

1262 def testPutTemplates(self) -> None: 

1263 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1264 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1265 

1266 # Add needed Dimensions 

1267 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1268 butler.registry.insertDimensionData( 

1269 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1270 ) 

1271 butler.registry.insertDimensionData( 

1272 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1273 ) 

1274 butler.registry.insertDimensionData( 

1275 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1276 ) 

1277 

1278 # Create and store a dataset 

1279 metric = makeExampleMetrics() 

1280 

1281 # Create two almost-identical DatasetTypes (both will use default 

1282 # template) 

1283 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1284 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1285 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1286 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1287 

1288 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1289 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1290 

1291 # Put with exactly the data ID keys needed 

1292 ref = butler.put(metric, "metric1", dataId1) 

1293 uri = butler.getURI(ref) 

1294 self.assertTrue(uri.exists()) 

1295 self.assertTrue( 

1296 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1297 ) 

1298 

1299 # Check the template based on dimensions 

1300 if hasattr(butler._datastore, "templates"): 

1301 butler._datastore.templates.validateTemplates([ref]) 

1302 

1303 # Put with extra data ID keys (physical_filter is an optional 

1304 # dependency); should not change template (at least the way we're 

1305 # defining them to behave now; the important thing is that they 

1306 # must be consistent). 

1307 ref = butler.put(metric, "metric2", dataId2) 

1308 uri = butler.getURI(ref) 

1309 self.assertTrue(uri.exists()) 

1310 self.assertTrue( 

1311 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1312 ) 

1313 

1314 # Check the template based on dimensions 

1315 if hasattr(butler._datastore, "templates"): 

1316 butler._datastore.templates.validateTemplates([ref]) 

1317 

1318 # Use a template that has a typo in dimension record metadata. 

1319 # Easier to test with a butler that has a ref with records attached. 

1320 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1321 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1322 path = template.format(ref) 

1323 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1324 

1325 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1326 with self.assertRaises(KeyError): 

1327 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1328 template.format(ref) 

1329 

1330 # Now use a file template that will not result in unique filenames 

1331 with self.assertRaises(FileTemplateValidationError): 

1332 butler.put(metric, "metric3", dataId1) 

1333 

1334 def testImportExport(self) -> None: 

1335 # Run put/get tests just to create and populate a repo. 

1336 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1337 self.runImportExportTest(storageClass) 

1338 

1339 @unittest.expectedFailure 

1340 def testImportExportVirtualComposite(self) -> None: 

1341 # Run put/get tests just to create and populate a repo. 

1342 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1343 self.runImportExportTest(storageClass) 

1344 

1345 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1346 """Test exporting and importing. 

1347 

1348 This test does an export to a temp directory and an import back 

1349 into a new temp directory repo. It does not assume a posix datastore. 

1350 """ 

1351 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1352 

1353 # Test that we must have a file extension. 

1354 with self.assertRaises(ValueError): 

1355 with exportButler.export(filename="dump", directory=".") as export: 

1356 pass 

1357 

1358 # Test that unknown format is not allowed. 

1359 with self.assertRaises(ValueError): 

1360 with exportButler.export(filename="dump.fits", directory=".") as export: 

1361 pass 

1362 

1363 # Test that the repo actually has at least one dataset. 

1364 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1365 self.assertGreater(len(datasets), 0) 

1366 # Add a DimensionRecord that's unused by those datasets. 

1367 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1368 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1369 # Export and then import datasets. 

1370 with safeTestTempDir(TESTDIR) as exportDir: 

1371 exportFile = os.path.join(exportDir, "exports.yaml") 

1372 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1373 export.saveDatasets(datasets) 

1374 # Export the same datasets again. This should quietly do 

1375 # nothing because of internal deduplication, and it shouldn't 

1376 # complain about being asked to export the "htm7" elements even 

1377 # though there aren't any in these datasets or in the database. 

1378 export.saveDatasets(datasets, elements=["htm7"]) 

1379 # Save one of the data IDs again; this should be harmless 

1380 # because of internal deduplication. 

1381 export.saveDataIds([datasets[0].dataId]) 

1382 # Save some dimension records directly. 

1383 export.saveDimensionData("skymap", [skymapRecord]) 

1384 self.assertTrue(os.path.exists(exportFile)) 

1385 with safeTestTempDir(TESTDIR) as importDir: 

1386 # We always want this to be a local posix butler 

1387 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1388 # Calling script.butlerImport tests the implementation of the 

1389 # butler command line interface "import" subcommand. Functions 

1390 # in the script folder are generally considered protected and 

1391 # should not be used as public api. 

1392 with open(exportFile) as f: 

1393 script.butlerImport( 

1394 importDir, 

1395 export_file=f, 

1396 directory=exportDir, 

1397 transfer="auto", 

1398 skip_dimensions=None, 

1399 ) 

1400 importButler = Butler.from_config(importDir, run=self.default_run) 

1401 for ref in datasets: 

1402 with self.subTest(ref=ref): 

1403 # Test for existence by passing in the DatasetType and 

1404 # data ID separately, to avoid lookup by dataset_id. 

1405 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1406 self.assertEqual( 

1407 list(importButler.registry.queryDimensionRecords("skymap")), 

1408 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)], 

1409 ) 

1410 

1411 def testRemoveRuns(self) -> None: 

1412 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1413 butler = Butler.from_config(self.tmpConfigFile, writeable=True) 

1414 # Load registry data with dimensions to hang datasets off of. 

1415 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1416 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1417 # Add some RUN-type collection. 

1418 run1 = "run1" 

1419 butler.registry.registerRun(run1) 

1420 run2 = "run2" 

1421 butler.registry.registerRun(run2) 

1422 # put a dataset in each 

1423 metric = makeExampleMetrics() 

1424 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1425 datasetType = self.addDatasetType( 

1426 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1427 ) 

1428 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1429 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1430 uri1 = butler.getURI(ref1) 

1431 uri2 = butler.getURI(ref2) 

1432 

1433 with self.assertRaises(OrphanedRecordError): 

1434 butler.registry.removeDatasetType(datasetType.name) 

1435 

1436 # Remove from both runs with different values for unstore. 

1437 butler.removeRuns([run1], unstore=True) 

1438 butler.removeRuns([run2], unstore=False) 

1439 # Should be nothing in registry for either one, and datastore should 

1440 # not think either exists. 

1441 with self.assertRaises(MissingCollectionError): 

1442 butler.registry.getCollectionType(run1) 

1443 with self.assertRaises(MissingCollectionError): 

1444 butler.registry.getCollectionType(run2) 

1445 self.assertFalse(butler.stored(ref1)) 

1446 self.assertFalse(butler.stored(ref2)) 

1447 # The ref we unstored should be gone according to the URI, but the 

1448 # one we forgot should still be around. 

1449 self.assertFalse(uri1.exists()) 

1450 self.assertTrue(uri2.exists()) 

1451 

1452 # Now that the collections have been pruned we can remove the 

1453 # dataset type 

1454 butler.registry.removeDatasetType(datasetType.name) 

1455 

1456 with self.assertLogs("lsst.daf.butler.registry", "INFO") as cm: 

1457 butler.registry.removeDatasetType(("test*", "test*")) 

1458 self.assertIn("not defined", "\n".join(cm.output)) 

1459 

1460 

1461class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1462 """PosixDatastore specialization of a butler""" 

1463 

1464 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1465 fullConfigKey: str | None = ".datastore.formatters" 

1466 validationCanFail = True 

1467 datastoreStr = ["/tmp"] 

1468 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1469 registryStr = "/gen3.sqlite3" 

1470 

1471 def testPathConstructor(self) -> None: 

1472 """Independent test of constructor using PathLike.""" 

1473 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1474 self.assertIsInstance(butler, Butler) 

1475 

1476 # And again with a Path object with the butler yaml 

1477 path = pathlib.Path(self.tmpConfigFile) 

1478 butler = Butler.from_config(path, writeable=False) 

1479 self.assertIsInstance(butler, Butler) 

1480 

1481 # And again with a Path object without the butler yaml 

1482 # (making sure we skip it if the tmp config doesn't end 

1483 # in butler.yaml -- which is the case for a subclass) 

1484 if self.tmpConfigFile.endswith("butler.yaml"): 

1485 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1486 butler = Butler.from_config(path, writeable=False) 

1487 self.assertIsInstance(butler, Butler) 

1488 

1489 def testExportTransferCopy(self) -> None: 

1490 """Test local export using all transfer modes""" 

1491 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1492 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1493 # Test that the repo actually has at least one dataset. 

1494 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1495 self.assertGreater(len(datasets), 0) 

1496 uris = [exportButler.getURI(d) for d in datasets] 

1497 assert isinstance(exportButler._datastore, FileDatastore) 

1498 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]] 

1499 

1500 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1501 

1502 for path in pathsInStore: 

1503 # Assume local file system 

1504 assert path is not None 

1505 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1506 

1507 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1508 with safeTestTempDir(TESTDIR) as exportDir: 

1509 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1510 export.saveDatasets(datasets) 

1511 for path in pathsInStore: 

1512 assert path is not None 

1513 self.assertTrue( 

1514 self.checkFileExists(exportDir, path), 

1515 f"Check that mode {transfer} exported files", 

1516 ) 

1517 

1518 def testPruneDatasets(self) -> None: 

1519 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1520 butler = Butler.from_config(self.tmpConfigFile, writeable=True) 

1521 assert isinstance(butler._datastore, FileDatastore) 

1522 # Load registry data with dimensions to hang datasets off of. 

1523 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1524 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1525 # Add some RUN-type collections. 

1526 run1 = "run1" 

1527 butler.registry.registerRun(run1) 

1528 run2 = "run2" 

1529 butler.registry.registerRun(run2) 

1530 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1531 # different runs. ref3 has a different data ID. 

1532 metric = makeExampleMetrics() 

1533 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1534 datasetType = self.addDatasetType( 

1535 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1536 ) 

1537 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1538 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1539 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1540 

1541 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1542 for ref, stored in many_stored.items(): 

1543 self.assertTrue(stored, f"Ref {ref} should be stored") 

1544 

1545 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1546 for ref, exists in many_exists.items(): 

1547 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1548 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1549 

1550 # Simple prune. 

1551 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1552 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1553 

1554 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1555 for ref, stored in many_stored.items(): 

1556 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1557 

1558 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1559 for ref, exists in many_exists.items(): 

1560 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1561 

1562 # Put data back. 

1563 ref1_new = butler.put(metric, ref1) 

1564 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1565 ref2 = butler.put(metric, ref2) 

1566 

1567 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1568 self.assertTrue(many_stored[ref1]) 

1569 self.assertTrue(many_stored[ref2]) 

1570 self.assertFalse(many_stored[ref3]) 

1571 

1572 ref3 = butler.put(metric, ref3) 

1573 

1574 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1575 for ref, exists in many_exists.items(): 

1576 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1577 

1578 # Clear out the datasets from registry and start again. 

1579 refs = [ref1, ref2, ref3] 

1580 butler.pruneDatasets(refs, purge=True, unstore=True) 

1581 for ref in refs: 

1582 butler.put(metric, ref) 

1583 

1584 # Confirm we can retrieve deferred. 

1585 dref1 = butler.getDeferred(ref1) # known and exists 

1586 metric1 = dref1.get() 

1587 self.assertEqual(metric1, metric) 

1588 

1589 # Test different forms of file availability. 

1590 # Need to be in a state where: 

1591 # - one ref just has registry record. 

1592 # - one ref has a missing file but a datastore record. 

1593 # - one ref has a missing datastore record but file is there. 

1594 # - one ref does not exist anywhere. 

1595 # Do not need to test a ref that has everything since that is tested 

1596 # above. 

1597 ref0 = DatasetRef( 

1598 datasetType, 

1599 DataCoordinate.standardize( 

1600 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1601 ), 

1602 run=run1, 

1603 ) 

1604 

1605 # Delete from datastore and retain in Registry. 

1606 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1607 

1608 # File has been removed. 

1609 uri2 = butler.getURI(ref2) 

1610 uri2.remove() 

1611 

1612 # Datastore has lost track. 

1613 butler._datastore.forget([ref3]) 

1614 

1615 # First test with a standard butler. 

1616 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1617 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1618 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1619 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1620 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1621 

1622 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1623 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1624 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1625 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1626 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1627 self.assertTrue(exists_many[ref2]) 

1628 

1629 # Check that per-ref query gives the same answer as many query. 

1630 for ref, exists in exists_many.items(): 

1631 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1632 

1633 # Get deferred checks for existence before it allows it to be 

1634 # retrieved. 

1635 with self.assertRaises(LookupError): 

1636 butler.getDeferred(ref3) # not known, file exists 

1637 dref2 = butler.getDeferred(ref2) # known but file missing 

1638 with self.assertRaises(FileNotFoundError): 

1639 dref2.get() 

1640 

1641 # Test again with a trusting butler. 

1642 butler._datastore.trustGetRequest = True 

1643 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1644 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1645 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1646 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1647 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1648 

1649 # When trusting we can get a deferred dataset handle that is not 

1650 # known but does exist. 

1651 dref3 = butler.getDeferred(ref3) 

1652 metric3 = dref3.get() 

1653 self.assertEqual(metric3, metric) 

1654 

1655 # Check that per-ref query gives the same answer as many query. 

1656 for ref, exists in exists_many.items(): 

1657 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1658 

1659 # Create a ref that surprisingly has the UUID of an existing ref 

1660 # but is not the same. 

1661 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1662 with self.assertRaises(ValueError): 

1663 butler.exists(ref_bad) 

1664 

1665 # Create a ref that has a compatible storage class. 

1666 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1667 exists = butler.exists(ref_compat) 

1668 self.assertEqual(exists, exists_many[ref2]) 

1669 

1670 # Remove everything and start from scratch. 

1671 butler._datastore.trustGetRequest = False 

1672 butler.pruneDatasets(refs, purge=True, unstore=True) 

1673 for ref in refs: 

1674 butler.put(metric, ref) 

1675 

1676 # These tests mess directly with the trash table and can leave the 

1677 # datastore in an odd state. Do them at the end. 

1678 # Check that in normal mode, deleting the record will lead to 

1679 # trash not touching the file. 

1680 uri1 = butler.getURI(ref1) 

1681 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1682 butler._datastore.forget([ref1]) 

1683 butler._datastore.trash(ref1) 

1684 butler._datastore.emptyTrash() 

1685 self.assertTrue(uri1.exists()) 

1686 uri1.remove() # Clean it up. 

1687 

1688 # Simulate execution butler setup by deleting the datastore 

1689 # record but keeping the file around and trusting. 

1690 butler._datastore.trustGetRequest = True 

1691 uris = butler.get_many_uris([ref2, ref3]) 

1692 uri2 = uris[ref2].primaryURI 

1693 uri3 = uris[ref3].primaryURI 

1694 self.assertTrue(uri2.exists()) 

1695 self.assertTrue(uri3.exists()) 

1696 

1697 # Remove the datastore record. 

1698 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1699 butler._datastore.forget([ref2]) 

1700 self.assertTrue(uri2.exists()) 

1701 butler._datastore.trash([ref2, ref3]) 

1702 # Immediate removal for ref2 file 

1703 self.assertFalse(uri2.exists()) 

1704 # But ref3 has to wait for the empty. 

1705 self.assertTrue(uri3.exists()) 

1706 butler._datastore.emptyTrash() 

1707 self.assertFalse(uri3.exists()) 

1708 

1709 # Clear out the datasets from registry. 

1710 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1711 

1712 def testPytypeCoercion(self) -> None: 

1713 """Test python type coercion on Butler.get and put.""" 

1714 # Store some data with the normal example storage class. 

1715 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1716 datasetTypeName = "test_metric" 

1717 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1718 

1719 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1720 metric = butler.get(datasetTypeName, dataId=dataId) 

1721 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1722 

1723 datasetType_ori = butler.get_dataset_type(datasetTypeName) 

1724 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1725 

1726 # Now need to hack the registry dataset type definition. 

1727 # There is no API for this. 

1728 assert isinstance(butler._registry, SqlRegistry) 

1729 manager = butler._registry._managers.datasets 

1730 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1731 manager._db.update( 

1732 manager._static.dataset_type, 

1733 {"name": datasetTypeName}, 

1734 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1735 ) 

1736 

1737 # Force reset of dataset type cache 

1738 butler.registry.refresh() 

1739 

1740 datasetType_new = butler.get_dataset_type(datasetTypeName) 

1741 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1742 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1743 

1744 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1745 self.assertNotEqual(type(metric_model), type(metric)) 

1746 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1747 

1748 # Put the model and read it back to show that everything now 

1749 # works as normal. 

1750 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1751 metric_model_new = butler.get(metric_ref) 

1752 self.assertEqual(metric_model_new, metric_model) 

1753 

1754 # Hack the storage class again to something that will fail on the 

1755 # get with no conversion class. 

1756 manager._db.update( 

1757 manager._static.dataset_type, 

1758 {"name": datasetTypeName}, 

1759 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1760 ) 

1761 butler.registry.refresh() 

1762 

1763 with self.assertRaises(ValueError): 

1764 butler.get(datasetTypeName, dataId=dataId) 

1765 

1766 

1767@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1768class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1769 """PosixDatastore specialization of a butler using Postgres""" 

1770 

1771 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1772 fullConfigKey = ".datastore.formatters" 

1773 validationCanFail = True 

1774 datastoreStr = ["/tmp"] 

1775 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1776 registryStr = "PostgreSQL@test" 

1777 postgresql: Any 

1778 

1779 @staticmethod 

1780 def _handler(postgresql: Any) -> None: 

1781 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1782 with engine.begin() as connection: 

1783 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1784 

1785 @classmethod 

1786 def setUpClass(cls) -> None: 

1787 # Create the postgres test server. 

1788 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1789 cache_initialized_db=True, on_initialized=cls._handler 

1790 ) 

1791 super().setUpClass() 

1792 

1793 @classmethod 

1794 def tearDownClass(cls) -> None: 

1795 # Clean up any lingering SQLAlchemy engines/connections 

1796 # so they're closed before we shut down the server. 

1797 gc.collect() 

1798 cls.postgresql.clear_cache() 

1799 super().tearDownClass() 

1800 

1801 def setUp(self) -> None: 

1802 self.server = self.postgresql() 

1803 

1804 # Need to add a registry section to the config. 

1805 self._temp_config = False 

1806 config = Config(self.configFile) 

1807 config["registry", "db"] = self.server.url() 

1808 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1809 config.dump(fh) 

1810 self.configFile = fh.name 

1811 self._temp_config = True 

1812 super().setUp() 

1813 

1814 def tearDown(self) -> None: 

1815 self.server.stop() 

1816 if self._temp_config and os.path.exists(self.configFile): 

1817 os.remove(self.configFile) 

1818 super().tearDown() 

1819 

1820 def testMakeRepo(self) -> None: 

1821 # The base class test assumes that it's using sqlite and assumes 

1822 # the config file is acceptable to sqlite. 

1823 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1824 

1825 

1826class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1827 """InMemoryDatastore specialization of a butler""" 

1828 

1829 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1830 fullConfigKey = None 

1831 useTempRoot = False 

1832 validationCanFail = False 

1833 datastoreStr = ["datastore='InMemory"] 

1834 datastoreName = ["InMemoryDatastore@"] 

1835 registryStr = "/gen3.sqlite3" 

1836 

1837 def testIngest(self) -> None: 

1838 pass 

1839 

1840 

1841class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1842 """PosixDatastore specialization""" 

1843 

1844 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1845 fullConfigKey = ".datastore.datastores.1.formatters" 

1846 validationCanFail = True 

1847 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1848 datastoreName = [ 

1849 "InMemoryDatastore@", 

1850 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1851 "SecondDatastore", 

1852 ] 

1853 registryStr = "/gen3.sqlite3" 

1854 

1855 

1856class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1857 """Test that a yaml file in one location can refer to a root in another.""" 

1858 

1859 datastoreStr = ["dir1"] 

1860 # Disable the makeRepo test since we are deliberately not using 

1861 # butler.yaml as the config name. 

1862 fullConfigKey = None 

1863 

1864 def setUp(self) -> None: 

1865 self.root = makeTestTempDir(TESTDIR) 

1866 

1867 # Make a new repository in one place 

1868 self.dir1 = os.path.join(self.root, "dir1") 

1869 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1870 

1871 # Move the yaml file to a different place and add a "root" 

1872 self.dir2 = os.path.join(self.root, "dir2") 

1873 os.makedirs(self.dir2, exist_ok=True) 

1874 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1875 config = Config(configFile1) 

1876 config["root"] = self.dir1 

1877 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1878 config.dumpToUri(configFile2) 

1879 os.remove(configFile1) 

1880 self.tmpConfigFile = configFile2 

1881 

1882 def testFileLocations(self) -> None: 

1883 self.assertNotEqual(self.dir1, self.dir2) 

1884 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1885 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1886 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1887 

1888 

1889class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1890 """Test that a config file created by makeRepo outside of repo works.""" 

1891 

1892 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1893 

1894 def setUp(self) -> None: 

1895 self.root = makeTestTempDir(TESTDIR) 

1896 self.root2 = makeTestTempDir(TESTDIR) 

1897 

1898 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1899 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1900 

1901 def tearDown(self) -> None: 

1902 if os.path.exists(self.root2): 

1903 shutil.rmtree(self.root2, ignore_errors=True) 

1904 super().tearDown() 

1905 

1906 def testConfigExistence(self) -> None: 

1907 c = Config(self.tmpConfigFile) 

1908 uri_config = ResourcePath(c["root"]) 

1909 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1910 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1911 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1912 

1913 def testPutGet(self) -> None: 

1914 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1915 self.runPutGetTest(storageClass, "test_metric") 

1916 

1917 

1918class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1919 """Test that a config file created by makeRepo outside of repo works.""" 

1920 

1921 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1922 

1923 def setUp(self) -> None: 

1924 self.root = makeTestTempDir(TESTDIR) 

1925 self.root2 = makeTestTempDir(TESTDIR) 

1926 

1927 self.tmpConfigFile = self.root2 

1928 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1929 

1930 def testConfigExistence(self) -> None: 

1931 # Append the yaml file else Config constructor does not know the file 

1932 # type. 

1933 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1934 super().testConfigExistence() 

1935 

1936 

1937class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1938 """Test that a config file created by makeRepo outside of repo works.""" 

1939 

1940 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1941 

1942 def setUp(self) -> None: 

1943 self.root = makeTestTempDir(TESTDIR) 

1944 self.root2 = makeTestTempDir(TESTDIR) 

1945 

1946 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1947 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1948 

1949 

1950@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1951class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1952 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1953 a local in-memory SqlRegistry. 

1954 """ 

1955 

1956 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1957 fullConfigKey = None 

1958 validationCanFail = True 

1959 

1960 bucketName = "anybucketname" 

1961 """Name of the Bucket that will be used in the tests. The name is read from 

1962 the config file used with the tests during set-up. 

1963 """ 

1964 

1965 root = "butlerRoot/" 

1966 """Root repository directory expected to be used in case useTempRoot=False. 

1967 Otherwise the root is set to a 20 characters long randomly generated string 

1968 during set-up. 

1969 """ 

1970 

1971 datastoreStr = [f"datastore={root}"] 

1972 """Contains all expected root locations in a format expected to be 

1973 returned by Butler stringification. 

1974 """ 

1975 

1976 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1977 """The expected format of the S3 Datastore string.""" 

1978 

1979 registryStr = "/gen3.sqlite3" 

1980 """Expected format of the Registry string.""" 

1981 

1982 mock_s3 = mock_s3() 

1983 """The mocked s3 interface from moto.""" 

1984 

1985 def genRoot(self) -> str: 

1986 """Return a random string of len 20 to serve as a root 

1987 name for the temporary bucket repo. 

1988 

1989 This is equivalent to tempfile.mkdtemp as this is what self.root 

1990 becomes when useTempRoot is True. 

1991 """ 

1992 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1993 return rndstr + "/" 

1994 

1995 def setUp(self) -> None: 

1996 config = Config(self.configFile) 

1997 uri = ResourcePath(config[".datastore.datastore.root"]) 

1998 self.bucketName = uri.netloc 

1999 

2000 # Enable S3 mocking of tests. 

2001 self.enterContext(clean_test_environment_for_s3()) 

2002 self.mock_s3.start() 

2003 

2004 if self.useTempRoot: 

2005 self.root = self.genRoot() 

2006 rooturi = f"s3://{self.bucketName}/{self.root}" 

2007 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

2008 

2009 # need local folder to store registry database 

2010 self.reg_dir = makeTestTempDir(TESTDIR) 

2011 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

2012 

2013 # MOTO needs to know that we expect Bucket bucketname to exist 

2014 # (this used to be the class attribute bucketName) 

2015 s3 = boto3.resource("s3") 

2016 s3.create_bucket(Bucket=self.bucketName) 

2017 

2018 self.datastoreStr = [f"datastore='{rooturi}'"] 

2019 self.datastoreName = [f"FileDatastore@{rooturi}"] 

2020 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

2021 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

2022 

2023 def tearDown(self) -> None: 

2024 s3 = boto3.resource("s3") 

2025 bucket = s3.Bucket(self.bucketName) 

2026 try: 

2027 bucket.objects.all().delete() 

2028 except botocore.exceptions.ClientError as e: 

2029 if e.response["Error"]["Code"] == "404": 

2030 # the key was not reachable - pass 

2031 pass 

2032 else: 

2033 raise 

2034 

2035 bucket = s3.Bucket(self.bucketName) 

2036 bucket.delete() 

2037 

2038 # Stop the S3 mock. 

2039 self.mock_s3.stop() 

2040 

2041 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

2042 shutil.rmtree(self.reg_dir, ignore_errors=True) 

2043 

2044 if self.useTempRoot and os.path.exists(self.root): 

2045 shutil.rmtree(self.root, ignore_errors=True) 

2046 

2047 super().tearDown() 

2048 

2049 

2050class PosixDatastoreTransfers(unittest.TestCase): 

2051 """Test data transfers between butlers. 

2052 

2053 Test for different managers. UUID to UUID and integer to integer are 

2054 tested. UUID to integer is not supported since we do not currently 

2055 want to allow that. Integer to UUID is supported with the caveat 

2056 that UUID4 will be generated and this will be incorrect for raw 

2057 dataset types. The test ignores that. 

2058 """ 

2059 

2060 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2061 storageClassFactory: StorageClassFactory 

2062 

2063 @classmethod 

2064 def setUpClass(cls) -> None: 

2065 cls.storageClassFactory = StorageClassFactory() 

2066 cls.storageClassFactory.addFromConfig(cls.configFile) 

2067 

2068 def setUp(self) -> None: 

2069 self.root = makeTestTempDir(TESTDIR) 

2070 self.config = Config(self.configFile) 

2071 

2072 def tearDown(self) -> None: 

2073 removeTestTempDir(self.root) 

2074 

2075 def create_butler(self, manager: str, label: str) -> Butler: 

2076 config = Config(self.configFile) 

2077 config["registry", "managers", "datasets"] = manager 

2078 return Butler.from_config( 

2079 Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True 

2080 ) 

2081 

2082 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

2083 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

2084 if manager1 is None: 

2085 manager1 = default 

2086 if manager2 is None: 

2087 manager2 = default 

2088 self.source_butler = self.create_butler(manager1, "1") 

2089 self.target_butler = self.create_butler(manager2, "2") 

2090 

2091 def testTransferUuidToUuid(self) -> None: 

2092 self.create_butlers() 

2093 self.assertButlerTransfers() 

2094 

2095 def _enable_trust(self, datastore: Datastore) -> None: 

2096 datastores = getattr(datastore, "datastores", [datastore]) 

2097 for this_datastore in datastores: 

2098 if hasattr(this_datastore, "trustGetRequest"): 

2099 this_datastore.trustGetRequest = True 

2100 

2101 def testTransferMissing(self) -> None: 

2102 """Test transfers where datastore records are missing. 

2103 

2104 This is how execution butler works. 

2105 """ 

2106 self.create_butlers() 

2107 

2108 # Configure the source butler to allow trust. 

2109 self._enable_trust(self.source_butler._datastore) 

2110 

2111 self.assertButlerTransfers(purge=True) 

2112 

2113 def testTransferMissingDisassembly(self) -> None: 

2114 """Test transfers where datastore records are missing. 

2115 

2116 This is how execution butler works. 

2117 """ 

2118 self.create_butlers() 

2119 

2120 # Configure the source butler to allow trust. 

2121 self._enable_trust(self.source_butler._datastore) 

2122 

2123 # Test disassembly. 

2124 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2125 

2126 def testAbsoluteURITransferDirect(self) -> None: 

2127 """Test transfer using an absolute URI.""" 

2128 self._absolute_transfer("auto") 

2129 

2130 def testAbsoluteURITransferCopy(self) -> None: 

2131 """Test transfer using an absolute URI.""" 

2132 self._absolute_transfer("copy") 

2133 

2134 def _absolute_transfer(self, transfer: str) -> None: 

2135 self.create_butlers() 

2136 

2137 storageClassName = "StructuredData" 

2138 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2139 datasetTypeName = "random_data" 

2140 run = "run1" 

2141 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2142 

2143 dimensions = self.source_butler.dimensions.conform(()) 

2144 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2145 self.source_butler.registry.registerDatasetType(datasetType) 

2146 

2147 metrics = makeExampleMetrics() 

2148 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2149 dataId = DataCoordinate.make_empty(self.source_butler.dimensions) 

2150 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2151 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2152 dataset = FileDataset(path=temp, refs=source_refs) 

2153 self.source_butler.ingest(dataset, transfer="direct") 

2154 

2155 self.target_butler.transfer_from( 

2156 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2157 ) 

2158 

2159 uri = self.target_butler.getURI(dataset.refs[0]) 

2160 if transfer == "auto": 

2161 self.assertEqual(uri, temp) 

2162 else: 

2163 self.assertNotEqual(uri, temp) 

2164 

2165 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2166 """Test that a run can be transferred to another butler.""" 

2167 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2168 datasetTypeName = "random_data" 

2169 

2170 # Test will create 3 collections and we will want to transfer 

2171 # two of those three. 

2172 runs = ["run1", "run2", "other"] 

2173 

2174 # Also want to use two different dataset types to ensure that 

2175 # grouping works. 

2176 datasetTypeNames = ["random_data", "random_data_2"] 

2177 

2178 # Create the run collections in the source butler. 

2179 for run in runs: 

2180 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2181 

2182 # Create dimensions in source butler. 

2183 n_exposures = 30 

2184 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2185 self.source_butler.registry.insertDimensionData( 

2186 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2187 ) 

2188 self.source_butler.registry.insertDimensionData( 

2189 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2190 ) 

2191 

2192 for i in range(n_exposures): 

2193 self.source_butler.registry.insertDimensionData( 

2194 "exposure", 

2195 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2196 ) 

2197 

2198 # Create dataset types in the source butler. 

2199 dimensions = self.source_butler.dimensions.conform(["instrument", "exposure"]) 

2200 for datasetTypeName in datasetTypeNames: 

2201 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2202 self.source_butler.registry.registerDatasetType(datasetType) 

2203 

2204 # Write a dataset to an unrelated run -- this will ensure that 

2205 # we are rewriting integer dataset ids in the target if necessary. 

2206 # Will not be relevant for UUID. 

2207 run = "distraction" 

2208 butler = Butler.from_config(butler=self.source_butler, run=run) 

2209 butler.put( 

2210 makeExampleMetrics(), 

2211 datasetTypeName, 

2212 exposure=1, 

2213 instrument="DummyCamComp", 

2214 physical_filter="d-r", 

2215 ) 

2216 

2217 # Write some example metrics to the source 

2218 butler = Butler.from_config(butler=self.source_butler) 

2219 

2220 # Set of DatasetRefs that should be in the list of refs to transfer 

2221 # but which will not be transferred. 

2222 deleted: set[DatasetRef] = set() 

2223 

2224 n_expected = 20 # Number of datasets expected to be transferred 

2225 source_refs = [] 

2226 for i in range(n_exposures): 

2227 # Put a third of datasets into each collection, only retain 

2228 # two thirds. 

2229 index = i % 3 

2230 run = runs[index] 

2231 datasetTypeName = datasetTypeNames[i % 2] 

2232 

2233 metric = MetricsExample( 

2234 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2235 ) 

2236 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2237 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2238 

2239 # Remove the datastore record using low-level API, but only 

2240 # for a specific index. 

2241 if purge and index == 1: 

2242 # For one of these delete the file as well. 

2243 # This allows the "missing" code to filter the 

2244 # file out. 

2245 # Access the individual datastores. 

2246 datastores = [] 

2247 if hasattr(butler._datastore, "datastores"): 

2248 datastores.extend(butler._datastore.datastores) 

2249 else: 

2250 datastores.append(butler._datastore) 

2251 

2252 if not deleted: 

2253 # For a chained datastore we need to remove 

2254 # files in each chain. 

2255 for datastore in datastores: 

2256 # The file might not be known to the datastore 

2257 # if constraints are used. 

2258 try: 

2259 primary, uris = datastore.getURIs(ref) 

2260 except FileNotFoundError: 

2261 continue 

2262 if primary and primary.scheme != "mem": 

2263 primary.remove() 

2264 for uri in uris.values(): 

2265 if uri.scheme != "mem": 

2266 uri.remove() 

2267 n_expected -= 1 

2268 deleted.add(ref) 

2269 

2270 # Remove the datastore record. 

2271 for datastore in datastores: 

2272 if hasattr(datastore, "removeStoredItemInfo"): 

2273 datastore.removeStoredItemInfo(ref) 

2274 

2275 if index < 2: 

2276 source_refs.append(ref) 

2277 if ref not in deleted: 

2278 new_metric = butler.get(ref) 

2279 self.assertEqual(new_metric, metric) 

2280 

2281 # Create some bad dataset types to ensure we check for inconsistent 

2282 # definitions. 

2283 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2284 for datasetTypeName in datasetTypeNames: 

2285 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2286 self.target_butler.registry.registerDatasetType(datasetType) 

2287 with self.assertRaises(ConflictingDefinitionError) as cm: 

2288 self.target_butler.transfer_from(self.source_butler, source_refs) 

2289 self.assertIn("dataset type differs", str(cm.exception)) 

2290 

2291 # And remove the bad definitions. 

2292 for datasetTypeName in datasetTypeNames: 

2293 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2294 

2295 # Transfer without creating dataset types should fail. 

2296 with self.assertRaises(KeyError): 

2297 self.target_butler.transfer_from(self.source_butler, source_refs) 

2298 

2299 # Transfer without creating dimensions should fail. 

2300 with self.assertRaises(ConflictingDefinitionError) as cm: 

2301 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2302 self.assertIn("dimension", str(cm.exception)) 

2303 

2304 # The failed transfer above leaves registry in an inconsistent 

2305 # state because the run is created but then rolled back without 

2306 # the collection cache being cleared. For now force a refresh. 

2307 # Can remove with DM-35498. 

2308 self.target_butler.registry.refresh() 

2309 

2310 # Transfer the records for one ref to test the alternative API. 

2311 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2312 self.target_butler.transfer_dimension_records_from(self.source_butler, [source_refs[0]]) 

2313 self.assertIn("number of records transferred: 1", ";".join(log_cm.output)) 

2314 

2315 # Now transfer them to the second butler, including dimensions. 

2316 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2317 transferred = self.target_butler.transfer_from( 

2318 self.source_butler, 

2319 source_refs, 

2320 register_dataset_types=True, 

2321 transfer_dimensions=True, 

2322 ) 

2323 self.assertEqual(len(transferred), n_expected) 

2324 log_output = ";".join(log_cm.output) 

2325 

2326 # A ChainedDatastore will use the in-memory datastore for mexists 

2327 # so we can not rely on the mexists log message. 

2328 self.assertIn("Number of datastore records found in source", log_output) 

2329 self.assertIn("Creating output run", log_output) 

2330 

2331 # Do the transfer twice to ensure that it will do nothing extra. 

2332 # Only do this if purge=True because it does not work for int 

2333 # dataset_id. 

2334 if purge: 

2335 # This should not need to register dataset types. 

2336 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2337 self.assertEqual(len(transferred), n_expected) 

2338 

2339 # Also do an explicit low-level transfer to trigger some 

2340 # edge cases. 

2341 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2342 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs) 

2343 log_output = ";".join(log_cm.output) 

2344 self.assertIn("no file artifacts exist", log_output) 

2345 

2346 with self.assertRaises((TypeError, AttributeError)): 

2347 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2348 

2349 with self.assertRaises(ValueError): 

2350 self.target_butler._datastore.transfer_from( 

2351 self.source_butler._datastore, source_refs, transfer="split" 

2352 ) 

2353 

2354 # Now try to get the same refs from the new butler. 

2355 for ref in source_refs: 

2356 if ref not in deleted: 

2357 new_metric = self.target_butler.get(ref) 

2358 old_metric = self.source_butler.get(ref) 

2359 self.assertEqual(new_metric, old_metric) 

2360 

2361 # Now prune run2 collection and create instead a CHAINED collection. 

2362 # This should block the transfer. 

2363 self.target_butler.removeRuns(["run2"], unstore=True) 

2364 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2365 with self.assertRaises(CollectionTypeError): 

2366 # Re-importing the run1 datasets can be problematic if they 

2367 # use integer IDs so filter those out. 

2368 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2369 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2370 

2371 

2372class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2373 """Test transfers using a chained datastore.""" 

2374 

2375 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2376 

2377 

2378class NullDatastoreTestCase(unittest.TestCase): 

2379 """Test that we can fall back to a null datastore.""" 

2380 

2381 # Need a good config to create the repo. 

2382 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2383 storageClassFactory: StorageClassFactory 

2384 

2385 @classmethod 

2386 def setUpClass(cls) -> None: 

2387 cls.storageClassFactory = StorageClassFactory() 

2388 cls.storageClassFactory.addFromConfig(cls.configFile) 

2389 

2390 def setUp(self) -> None: 

2391 """Create a new butler root for each test.""" 

2392 self.root = makeTestTempDir(TESTDIR) 

2393 Butler.makeRepo(self.root, config=Config(self.configFile)) 

2394 

2395 def tearDown(self) -> None: 

2396 removeTestTempDir(self.root) 

2397 

2398 def test_fallback(self) -> None: 

2399 # Read the butler config and mess with the datastore section. 

2400 config_path = os.path.join(self.root, "butler.yaml") 

2401 bad_config = Config(config_path) 

2402 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" 

2403 bad_config.dumpToUri(config_path) 

2404 

2405 with self.assertRaises(RuntimeError): 

2406 Butler(self.root, without_datastore=False) 

2407 

2408 with self.assertRaises(RuntimeError): 

2409 Butler.from_config(self.root, without_datastore=False) 

2410 

2411 butler = Butler.from_config(self.root, writeable=True, without_datastore=True) 

2412 self.assertIsInstance(butler._datastore, NullDatastore) 

2413 

2414 # Check that registry is working. 

2415 butler.registry.registerRun("MYRUN") 

2416 collections = butler.registry.queryCollections(...) 

2417 self.assertIn("MYRUN", set(collections)) 

2418 

2419 # Create a ref. 

2420 dimensions = butler.dimensions.conform([]) 

2421 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2422 datasetTypeName = "metric" 

2423 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2424 butler.registry.registerDatasetType(datasetType) 

2425 ref = DatasetRef(datasetType, {}, run="MYRUN") 

2426 

2427 # Check that datastore will complain. 

2428 with self.assertRaises(FileNotFoundError): 

2429 butler.get(ref) 

2430 with self.assertRaises(FileNotFoundError): 

2431 butler.getURI(ref) 

2432 

2433 

2434def setup_module(module: types.ModuleType) -> None: 

2435 """Set up the module for pytest.""" 

2436 clean_environment() 

2437 

2438 

2439if __name__ == "__main__": 

2440 clean_environment() 

2441 unittest.main()