Coverage for tests/test_butler.py: 13%

1304 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24from __future__ import annotations 

25 

26import gc 

27import json 

28import logging 

29import os 

30import pathlib 

31import pickle 

32import posixpath 

33import random 

34import shutil 

35import string 

36import tempfile 

37import unittest 

38import uuid 

39from collections.abc import Mapping 

40from typing import TYPE_CHECKING, Any, cast 

41 

42try: 

43 import boto3 

44 import botocore 

45 from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

46 from moto import mock_s3 # type: ignore[import] 

47except ImportError: 

48 boto3 = None 

49 

50 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] 

51 """No-op decorator in case moto mock_s3 can not be imported.""" 

52 return None 

53 

54 

55try: 

56 # It's possible but silly to have testing.postgresql installed without 

57 # having the postgresql server installed (because then nothing in 

58 # testing.postgresql would work), so we use the presence of that module 

59 # to test whether we can expect the server to be available. 

60 import testing.postgresql # type: ignore[import] 

61except ImportError: 

62 testing = None 

63 

64import astropy.time 

65import sqlalchemy 

66from lsst.daf.butler import ( 

67 Butler, 

68 ButlerConfig, 

69 ButlerRepoIndex, 

70 CollectionType, 

71 Config, 

72 DataCoordinate, 

73 DatasetExistence, 

74 DatasetRef, 

75 DatasetType, 

76 FileDataset, 

77 FileTemplate, 

78 FileTemplateValidationError, 

79 NullDatastore, 

80 StorageClassFactory, 

81 ValidationError, 

82 script, 

83) 

84from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

85from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

86from lsst.daf.butler.registries.sql import SqlRegistry 

87from lsst.daf.butler.registry import ( 

88 CollectionError, 

89 CollectionTypeError, 

90 ConflictingDefinitionError, 

91 DataIdValueError, 

92 MissingCollectionError, 

93 OrphanedRecordError, 

94) 

95from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

96from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

97from lsst.resources import ResourcePath 

98from lsst.utils import doImportType 

99from lsst.utils.introspection import get_full_type_name 

100 

101if TYPE_CHECKING: 

102 import types 

103 

104 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass 

105 

106TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

107 

108 

109def clean_environment() -> None: 

110 """Remove external environment variables that affect the tests.""" 

111 for k in ( 

112 "DAF_BUTLER_REPOSITORY_INDEX", 

113 "S3_ENDPOINT_URL", 

114 "AWS_ACCESS_KEY_ID", 

115 "AWS_SECRET_ACCESS_KEY", 

116 "AWS_SHARED_CREDENTIALS_FILE", 

117 ): 

118 os.environ.pop(k, None) 

119 

120 

121def makeExampleMetrics() -> MetricsExample: 

122 """Return example dataset suitable for tests.""" 

123 return MetricsExample( 

124 {"AM1": 5.2, "AM2": 30.6}, 

125 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

126 [563, 234, 456.7, 752, 8, 9, 27], 

127 ) 

128 

129 

130class TransactionTestError(Exception): 

131 """Specific error for testing transactions, to prevent misdiagnosing 

132 that might otherwise occur when a standard exception is used. 

133 """ 

134 

135 pass 

136 

137 

138class ButlerConfigTests(unittest.TestCase): 

139 """Simple tests for ButlerConfig that are not tested in any other test 

140 cases. 

141 """ 

142 

143 def testSearchPath(self) -> None: 

144 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

145 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

146 config1 = ButlerConfig(configFile) 

147 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

148 

149 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

150 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

151 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

152 self.assertIn("testConfigs", "\n".join(cm.output)) 

153 

154 key = ("datastore", "records", "table") 

155 self.assertNotEqual(config1[key], config2[key]) 

156 self.assertEqual(config2[key], "override_record") 

157 

158 

159class ButlerPutGetTests(TestCaseMixin): 

160 """Helper method for running a suite of put/get tests from different 

161 butler configurations. 

162 """ 

163 

164 root: str 

165 default_run = "ingésτ😺" 

166 storageClassFactory: StorageClassFactory 

167 configFile: str 

168 tmpConfigFile: str 

169 

170 @staticmethod 

171 def addDatasetType( 

172 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry 

173 ) -> DatasetType: 

174 """Create a DatasetType and register it""" 

175 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

176 registry.registerDatasetType(datasetType) 

177 return datasetType 

178 

179 @classmethod 

180 def setUpClass(cls) -> None: 

181 cls.storageClassFactory = StorageClassFactory() 

182 cls.storageClassFactory.addFromConfig(cls.configFile) 

183 

184 def assertGetComponents( 

185 self, 

186 butler: Butler, 

187 datasetRef: DatasetRef, 

188 components: tuple[str, ...], 

189 reference: Any, 

190 collections: Any = None, 

191 ) -> None: 

192 datasetType = datasetRef.datasetType 

193 dataId = datasetRef.dataId 

194 deferred = butler.getDeferred(datasetRef) 

195 

196 for component in components: 

197 compTypeName = datasetType.componentTypeName(component) 

198 result = butler.get(compTypeName, dataId, collections=collections) 

199 self.assertEqual(result, getattr(reference, component)) 

200 result_deferred = deferred.get(component=component) 

201 self.assertEqual(result_deferred, result) 

202 

203 def tearDown(self) -> None: 

204 removeTestTempDir(self.root) 

205 

206 def create_butler( 

207 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

208 ) -> tuple[Butler, DatasetType]: 

209 butler = Butler(self.tmpConfigFile, run=run) 

210 

211 collections = set(butler.registry.queryCollections()) 

212 self.assertEqual(collections, {run}) 

213 

214 # Create and register a DatasetType 

215 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

216 

217 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

218 

219 # Add needed Dimensions 

220 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

221 butler.registry.insertDimensionData( 

222 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

223 ) 

224 butler.registry.insertDimensionData( 

225 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

226 ) 

227 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

228 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

229 butler.registry.insertDimensionData( 

230 "visit", 

231 { 

232 "instrument": "DummyCamComp", 

233 "id": 423, 

234 "name": "fourtwentythree", 

235 "physical_filter": "d-r", 

236 "visit_system": 1, 

237 "datetime_begin": visit_start, 

238 "datetime_end": visit_end, 

239 }, 

240 ) 

241 

242 # Add more visits for some later tests 

243 for visit_id in (424, 425): 

244 butler.registry.insertDimensionData( 

245 "visit", 

246 { 

247 "instrument": "DummyCamComp", 

248 "id": visit_id, 

249 "name": f"fourtwentyfour_{visit_id}", 

250 "physical_filter": "d-r", 

251 "visit_system": 1, 

252 }, 

253 ) 

254 return butler, datasetType 

255 

256 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: 

257 # New datasets will be added to run and tag, but we will only look in 

258 # tag when looking up datasets. 

259 run = self.default_run 

260 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

261 assert butler.run is not None 

262 

263 # Create and store a dataset 

264 metric = makeExampleMetrics() 

265 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

266 

267 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

268 # and once with a DatasetType 

269 

270 # Keep track of any collections we add and do not clean up 

271 expected_collections = {run} 

272 

273 counter = 0 

274 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

275 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

276 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

277 # Since we are using subTest we can get cascading failures 

278 # here with the first attempt failing and the others failing 

279 # immediately because the dataset already exists. Work around 

280 # this by using a distinct run collection each time 

281 counter += 1 

282 this_run = f"put_run_{counter}" 

283 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

284 expected_collections.update({this_run}) 

285 

286 with self.subTest(args=args): 

287 kwargs: dict[str, Any] = {} 

288 if not isinstance(args[0], DatasetRef): # type: ignore 

289 kwargs["run"] = this_run 

290 ref = butler.put(metric, *args, **kwargs) 

291 self.assertIsInstance(ref, DatasetRef) 

292 

293 # Test getDirect 

294 metricOut = butler.get(ref) 

295 self.assertEqual(metric, metricOut) 

296 # Test get 

297 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

298 self.assertEqual(metric, metricOut) 

299 # Test get with a datasetRef 

300 metricOut = butler.get(ref) 

301 self.assertEqual(metric, metricOut) 

302 # Test getDeferred with dataId 

303 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

304 self.assertEqual(metric, metricOut) 

305 # Test getDeferred with a ref 

306 metricOut = butler.getDeferred(ref).get() 

307 self.assertEqual(metric, metricOut) 

308 

309 # Check we can get components 

310 if storageClass.isComposite(): 

311 self.assertGetComponents( 

312 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

313 ) 

314 

315 # Can the artifacts themselves be retrieved? 

316 if not butler._datastore.isEphemeral: 

317 root_uri = ResourcePath(self.root) 

318 

319 for preserve_path in (True, False): 

320 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

321 # Use copy so that we can test that overwrite 

322 # protection works (using "auto" for File URIs would 

323 # use hard links and subsequent transfer would work 

324 # because it knows they are the same file). 

325 transferred = butler.retrieveArtifacts( 

326 [ref], destination, preserve_path=preserve_path, transfer="copy" 

327 ) 

328 self.assertGreater(len(transferred), 0) 

329 artifacts = list(ResourcePath.findFileResources([destination])) 

330 self.assertEqual(set(transferred), set(artifacts)) 

331 

332 for artifact in transferred: 

333 path_in_destination = artifact.relative_to(destination) 

334 self.assertIsNotNone(path_in_destination) 

335 assert path_in_destination is not None 

336 

337 # when path is not preserved there should not be 

338 # any path separators. 

339 num_seps = path_in_destination.count("/") 

340 if preserve_path: 

341 self.assertGreater(num_seps, 0) 

342 else: 

343 self.assertEqual(num_seps, 0) 

344 

345 primary_uri, secondary_uris = butler.getURIs(ref) 

346 n_uris = len(secondary_uris) 

347 if primary_uri: 

348 n_uris += 1 

349 self.assertEqual( 

350 len(artifacts), 

351 n_uris, 

352 "Comparing expected artifacts vs actual:" 

353 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

354 ) 

355 

356 if preserve_path: 

357 # No need to run these twice 

358 with self.assertRaises(ValueError): 

359 butler.retrieveArtifacts([ref], destination, transfer="move") 

360 

361 with self.assertRaises(FileExistsError): 

362 butler.retrieveArtifacts([ref], destination) 

363 

364 transferred_again = butler.retrieveArtifacts( 

365 [ref], destination, preserve_path=preserve_path, overwrite=True 

366 ) 

367 self.assertEqual(set(transferred_again), set(transferred)) 

368 

369 # Now remove the dataset completely. 

370 butler.pruneDatasets([ref], purge=True, unstore=True) 

371 # Lookup with original args should still fail. 

372 kwargs = {"collections": this_run} 

373 if isinstance(args[0], DatasetRef): 

374 kwargs = {} # Prevent warning from being issued. 

375 self.assertFalse(butler.exists(*args, **kwargs)) 

376 # get() should still fail. 

377 with self.assertRaises(FileNotFoundError): 

378 butler.get(ref) 

379 # Registry shouldn't be able to find it by dataset_id anymore. 

380 self.assertIsNone(butler.registry.getDataset(ref.id)) 

381 

382 # Do explicit registry removal since we know they are 

383 # empty 

384 butler.registry.removeCollection(this_run) 

385 expected_collections.remove(this_run) 

386 

387 # Create DatasetRef for put using default run. 

388 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

389 

390 # Check that getDeferred fails with standalone ref. 

391 with self.assertRaises(LookupError): 

392 butler.getDeferred(refIn) 

393 

394 # Put the dataset again, since the last thing we did was remove it 

395 # and we want to use the default collection. 

396 ref = butler.put(metric, refIn) 

397 

398 # Get with parameters 

399 stop = 4 

400 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

401 self.assertNotEqual(metric, sliced) 

402 self.assertEqual(metric.summary, sliced.summary) 

403 self.assertEqual(metric.output, sliced.output) 

404 assert metric.data is not None # for mypy 

405 self.assertEqual(metric.data[:stop], sliced.data) 

406 # getDeferred with parameters 

407 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

408 self.assertNotEqual(metric, sliced) 

409 self.assertEqual(metric.summary, sliced.summary) 

410 self.assertEqual(metric.output, sliced.output) 

411 self.assertEqual(metric.data[:stop], sliced.data) 

412 # getDeferred with deferred parameters 

413 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

414 self.assertNotEqual(metric, sliced) 

415 self.assertEqual(metric.summary, sliced.summary) 

416 self.assertEqual(metric.output, sliced.output) 

417 self.assertEqual(metric.data[:stop], sliced.data) 

418 

419 if storageClass.isComposite(): 

420 # Check that components can be retrieved 

421 metricOut = butler.get(ref.datasetType.name, dataId) 

422 compNameS = ref.datasetType.componentTypeName("summary") 

423 compNameD = ref.datasetType.componentTypeName("data") 

424 summary = butler.get(compNameS, dataId) 

425 self.assertEqual(summary, metric.summary) 

426 data = butler.get(compNameD, dataId) 

427 self.assertEqual(data, metric.data) 

428 

429 if "counter" in storageClass.derivedComponents: 

430 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

431 self.assertEqual(count, len(data)) 

432 

433 count = butler.get( 

434 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

435 ) 

436 self.assertEqual(count, stop) 

437 

438 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

439 assert compRef is not None 

440 summary = butler.get(compRef) 

441 self.assertEqual(summary, metric.summary) 

442 

443 # Create a Dataset type that has the same name but is inconsistent. 

444 inconsistentDatasetType = DatasetType( 

445 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

446 ) 

447 

448 # Getting with a dataset type that does not match registry fails 

449 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"): 

450 butler.get(inconsistentDatasetType, dataId) 

451 

452 # Combining a DatasetRef with a dataId should fail 

453 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

454 butler.get(ref, dataId) 

455 # Getting with an explicit ref should fail if the id doesn't match. 

456 with self.assertRaises(FileNotFoundError): 

457 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

458 

459 # Getting a dataset with unknown parameters should fail 

460 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

461 butler.get(ref, parameters={"unsupported": True}) 

462 

463 # Check we have a collection 

464 collections = set(butler.registry.queryCollections()) 

465 self.assertEqual(collections, expected_collections) 

466 

467 # Clean up to check that we can remove something that may have 

468 # already had a component removed 

469 butler.pruneDatasets([ref], unstore=True, purge=True) 

470 

471 # Add the same ref again, so we can check that duplicate put fails. 

472 ref = butler.put(metric, datasetType, dataId) 

473 

474 # Repeat put will fail. 

475 with self.assertRaisesRegex( 

476 ConflictingDefinitionError, "A database constraint failure was triggered" 

477 ): 

478 butler.put(metric, datasetType, dataId) 

479 

480 # Remove the datastore entry. 

481 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

482 

483 # Put will still fail 

484 with self.assertRaisesRegex( 

485 ConflictingDefinitionError, "A database constraint failure was triggered" 

486 ): 

487 butler.put(metric, datasetType, dataId) 

488 

489 # Repeat the same sequence with resolved ref. 

490 butler.pruneDatasets([ref], unstore=True, purge=True) 

491 ref = butler.put(metric, refIn) 

492 

493 # Repeat put will fail. 

494 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

495 butler.put(metric, refIn) 

496 

497 # Remove the datastore entry. 

498 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

499 

500 # In case of resolved ref this write will succeed. 

501 ref = butler.put(metric, refIn) 

502 

503 # Leave the dataset in place since some downstream tests require 

504 # something to be present 

505 

506 return butler 

507 

508 def testDeferredCollectionPassing(self) -> None: 

509 # Construct a butler with no run or collection, but make it writeable. 

510 butler = Butler(self.tmpConfigFile, writeable=True) 

511 # Create and register a DatasetType 

512 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

513 datasetType = self.addDatasetType( 

514 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

515 ) 

516 # Add needed Dimensions 

517 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

518 butler.registry.insertDimensionData( 

519 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

520 ) 

521 butler.registry.insertDimensionData( 

522 "visit", 

523 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

524 ) 

525 dataId = {"instrument": "DummyCamComp", "visit": 423} 

526 # Create dataset. 

527 metric = makeExampleMetrics() 

528 # Register a new run and put dataset. 

529 run = "deferred" 

530 self.assertTrue(butler.registry.registerRun(run)) 

531 # Second time it will be allowed but indicate no-op 

532 self.assertFalse(butler.registry.registerRun(run)) 

533 ref = butler.put(metric, datasetType, dataId, run=run) 

534 # Putting with no run should fail with TypeError. 

535 with self.assertRaises(CollectionError): 

536 butler.put(metric, datasetType, dataId) 

537 # Dataset should exist. 

538 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

539 # We should be able to get the dataset back, but with and without 

540 # a deferred dataset handle. 

541 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

542 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

543 # Trying to find the dataset without any collection is a TypeError. 

544 self.assertFalse(butler.exists(datasetType, dataId)) 

545 with self.assertRaises(CollectionError): 

546 butler.get(datasetType, dataId) 

547 # Associate the dataset with a different collection. 

548 butler.registry.registerCollection("tagged") 

549 butler.registry.associate("tagged", [ref]) 

550 # Deleting the dataset from the new collection should make it findable 

551 # in the original collection. 

552 butler.pruneDatasets([ref], tags=["tagged"]) 

553 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

554 

555 

556class ButlerTests(ButlerPutGetTests): 

557 """Tests for Butler.""" 

558 

559 useTempRoot = True 

560 validationCanFail: bool 

561 fullConfigKey: str | None 

562 registryStr: str | None 

563 datastoreName: list[str] | None 

564 datastoreStr: list[str] 

565 

566 def setUp(self) -> None: 

567 """Create a new butler root for each test.""" 

568 self.root = makeTestTempDir(TESTDIR) 

569 Butler.makeRepo(self.root, config=Config(self.configFile)) 

570 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

571 

572 def testConstructor(self) -> None: 

573 """Independent test of constructor.""" 

574 butler = Butler(self.tmpConfigFile, run=self.default_run) 

575 self.assertIsInstance(butler, Butler) 

576 

577 # Check that butler.yaml is added automatically. 

578 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

579 config_dir = self.tmpConfigFile[: -len(end)] 

580 butler = Butler(config_dir, run=self.default_run) 

581 self.assertIsInstance(butler, Butler) 

582 

583 # Even with a ResourcePath. 

584 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

585 self.assertIsInstance(butler, Butler) 

586 

587 collections = set(butler.registry.queryCollections()) 

588 self.assertEqual(collections, {self.default_run}) 

589 

590 # Check that some special characters can be included in run name. 

591 special_run = "u@b.c-A" 

592 butler_special = Butler(butler=butler, run=special_run) 

593 collections = set(butler_special.registry.queryCollections("*@*")) 

594 self.assertEqual(collections, {special_run}) 

595 

596 butler2 = Butler(butler=butler, collections=["other"]) 

597 self.assertEqual(butler2.collections, ("other",)) 

598 self.assertIsNone(butler2.run) 

599 self.assertIs(butler._datastore, butler2._datastore) 

600 

601 # Test that we can use an environment variable to find this 

602 # repository. 

603 butler_index = Config() 

604 butler_index["label"] = self.tmpConfigFile 

605 for suffix in (".yaml", ".json"): 

606 # Ensure that the content differs so that we know that 

607 # we aren't reusing the cache. 

608 bad_label = f"file://bucket/not_real{suffix}" 

609 butler_index["bad_label"] = bad_label 

610 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

611 butler_index.dumpToUri(temp_file) 

612 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

613 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"}) 

614 uri = Butler.get_repo_uri("bad_label") 

615 self.assertEqual(uri, ResourcePath(bad_label)) 

616 uri = Butler.get_repo_uri("label") 

617 butler = Butler(uri, writeable=False) 

618 self.assertIsInstance(butler, Butler) 

619 butler = Butler("label", writeable=False) 

620 self.assertIsInstance(butler, Butler) 

621 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

622 Butler("not_there", writeable=False) 

623 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): 

624 Butler("bad_label") 

625 with self.assertRaises(FileNotFoundError): 

626 # Should ignore aliases. 

627 Butler(ResourcePath("label", forceAbsolute=False)) 

628 with self.assertRaises(KeyError) as cm: 

629 Butler.get_repo_uri("missing") 

630 self.assertEqual( 

631 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False) 

632 ) 

633 self.assertIn("not known to", str(cm.exception)) 

634 # Should report no failure. 

635 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "") 

636 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

637 # Now with empty configuration. 

638 butler_index = Config() 

639 butler_index.dumpToUri(temp_file) 

640 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

641 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): 

642 Butler("label") 

643 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

644 # Now with bad contents. 

645 with open(temp_file.ospath, "w") as fh: 

646 print("'", file=fh) 

647 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

648 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): 

649 Butler("label") 

650 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

651 with self.assertRaises(FileNotFoundError): 

652 Butler.get_repo_uri("label") 

653 self.assertEqual(Butler.get_known_repos(), set()) 

654 

655 with self.assertRaisesRegex(FileNotFoundError, "index file not found"): 

656 Butler("label") 

657 

658 # Check that we can create Butler when the alias file is not found. 

659 butler = Butler(self.tmpConfigFile, writeable=False) 

660 self.assertIsInstance(butler, Butler) 

661 with self.assertRaises(KeyError) as cm: 

662 # No environment variable set. 

663 Butler.get_repo_uri("label") 

664 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) 

665 self.assertIn("No repository index defined", str(cm.exception)) 

666 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): 

667 # No aliases registered. 

668 Butler("not_there") 

669 self.assertEqual(Butler.get_known_repos(), set()) 

670 

671 def testBasicPutGet(self) -> None: 

672 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

673 self.runPutGetTest(storageClass, "test_metric") 

674 

675 def testCompositePutGetConcrete(self) -> None: 

676 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

677 butler = self.runPutGetTest(storageClass, "test_metric") 

678 

679 # Should *not* be disassembled 

680 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

681 self.assertEqual(len(datasets), 1) 

682 uri, components = butler.getURIs(datasets[0]) 

683 self.assertIsInstance(uri, ResourcePath) 

684 self.assertFalse(components) 

685 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

686 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

687 

688 # Predicted dataset 

689 dataId = {"instrument": "DummyCamComp", "visit": 424} 

690 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

691 self.assertFalse(components) 

692 self.assertIsInstance(uri, ResourcePath) 

693 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

694 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

695 

696 def testCompositePutGetVirtual(self) -> None: 

697 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

698 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

699 

700 # Should be disassembled 

701 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

702 self.assertEqual(len(datasets), 1) 

703 uri, components = butler.getURIs(datasets[0]) 

704 

705 if butler._datastore.isEphemeral: 

706 # Never disassemble in-memory datastore 

707 self.assertIsInstance(uri, ResourcePath) 

708 self.assertFalse(components) 

709 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

710 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

711 else: 

712 self.assertIsNone(uri) 

713 self.assertEqual(set(components), set(storageClass.components)) 

714 for compuri in components.values(): 

715 self.assertIsInstance(compuri, ResourcePath) 

716 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

717 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

718 

719 # Predicted dataset 

720 dataId = {"instrument": "DummyCamComp", "visit": 424} 

721 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

722 

723 if butler._datastore.isEphemeral: 

724 # Never disassembled 

725 self.assertIsInstance(uri, ResourcePath) 

726 self.assertFalse(components) 

727 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

728 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

729 else: 

730 self.assertIsNone(uri) 

731 self.assertEqual(set(components), set(storageClass.components)) 

732 for compuri in components.values(): 

733 self.assertIsInstance(compuri, ResourcePath) 

734 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

735 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

736 

737 def testStorageClassOverrideGet(self) -> None: 

738 """Test storage class conversion on get with override.""" 

739 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

740 datasetTypeName = "anything" 

741 run = self.default_run 

742 

743 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

744 

745 # Create and store a dataset. 

746 metric = makeExampleMetrics() 

747 dataId = {"instrument": "DummyCamComp", "visit": 423} 

748 

749 ref = butler.put(metric, datasetType, dataId) 

750 

751 # Return native type. 

752 retrieved = butler.get(ref) 

753 self.assertEqual(retrieved, metric) 

754 

755 # Specify an override. 

756 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

757 model = butler.get(ref, storageClass=new_sc) 

758 self.assertNotEqual(type(model), type(retrieved)) 

759 self.assertIs(type(model), new_sc.pytype) 

760 self.assertEqual(retrieved, model) 

761 

762 # Defer but override later. 

763 deferred = butler.getDeferred(ref) 

764 model = deferred.get(storageClass=new_sc) 

765 self.assertIs(type(model), new_sc.pytype) 

766 self.assertEqual(retrieved, model) 

767 

768 # Defer but override up front. 

769 deferred = butler.getDeferred(ref, storageClass=new_sc) 

770 model = deferred.get() 

771 self.assertIs(type(model), new_sc.pytype) 

772 self.assertEqual(retrieved, model) 

773 

774 # Retrieve a component. Should be a tuple. 

775 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

776 self.assertIs(type(data), tuple) 

777 self.assertEqual(data, tuple(retrieved.data)) 

778 

779 # Parameter on the write storage class should work regardless 

780 # of read storage class. 

781 data = butler.get( 

782 "anything.data", 

783 dataId, 

784 storageClass="StructuredDataDataTestTuple", 

785 parameters={"slice": slice(2, 4)}, 

786 ) 

787 self.assertEqual(len(data), 2) 

788 

789 # Try a parameter that is known to the read storage class but not 

790 # the write storage class. 

791 with self.assertRaises(KeyError): 

792 butler.get( 

793 "anything.data", 

794 dataId, 

795 storageClass="StructuredDataDataTestTuple", 

796 parameters={"xslice": slice(2, 4)}, 

797 ) 

798 

799 def testPytypePutCoercion(self) -> None: 

800 """Test python type coercion on Butler.get and put.""" 

801 # Store some data with the normal example storage class. 

802 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

803 datasetTypeName = "test_metric" 

804 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

805 

806 dataId = {"instrument": "DummyCamComp", "visit": 423} 

807 

808 # Put a dict and this should coerce to a MetricsExample 

809 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

810 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

811 test_metric = butler.get(metric_ref) 

812 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

813 self.assertEqual(test_metric.summary, test_dict["summary"]) 

814 self.assertEqual(test_metric.output, test_dict["output"]) 

815 

816 # Check that the put still works if a DatasetType is given with 

817 # a definition matching this python type. 

818 registry_type = butler.registry.getDatasetType(datasetTypeName) 

819 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

820 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

821 self.assertEqual(metric2_ref.datasetType, registry_type) 

822 

823 # The get will return the type expected by registry. 

824 test_metric2 = butler.get(metric2_ref) 

825 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

826 

827 # Make a new DatasetRef with the compatible but different DatasetType. 

828 # This should now return a dict. 

829 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

830 test_dict2 = butler.get(new_ref) 

831 self.assertEqual(get_full_type_name(test_dict2), "dict") 

832 

833 # Get it again with the wrong dataset type definition using get() 

834 # rather than get(). This should be consistent with get() 

835 # behavior and return the type of the DatasetType. 

836 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

837 self.assertEqual(get_full_type_name(test_dict3), "dict") 

838 

839 def testIngest(self) -> None: 

840 butler = Butler(self.tmpConfigFile, run=self.default_run) 

841 

842 # Create and register a DatasetType 

843 dimensions = butler.dimensions.extract(["instrument", "visit", "detector"]) 

844 

845 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

846 datasetTypeName = "metric" 

847 

848 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

849 

850 # Add needed Dimensions 

851 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

852 butler.registry.insertDimensionData( 

853 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

854 ) 

855 for detector in (1, 2): 

856 butler.registry.insertDimensionData( 

857 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

858 ) 

859 

860 butler.registry.insertDimensionData( 

861 "visit", 

862 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

863 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

864 ) 

865 

866 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

867 dataRoot = os.path.join(TESTDIR, "data", "basic") 

868 datasets = [] 

869 for detector in (1, 2): 

870 detector_name = f"detector_{detector}" 

871 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

872 dataId = butler.registry.expandDataId( 

873 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

874 ) 

875 # Create a DatasetRef for ingest 

876 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

877 

878 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

879 

880 butler.ingest(*datasets, transfer="copy") 

881 

882 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

883 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

884 

885 metrics1 = butler.get(datasetTypeName, dataId1) 

886 metrics2 = butler.get(datasetTypeName, dataId2) 

887 self.assertNotEqual(metrics1, metrics2) 

888 

889 # Compare URIs 

890 uri1 = butler.getURI(datasetTypeName, dataId1) 

891 uri2 = butler.getURI(datasetTypeName, dataId2) 

892 self.assertNotEqual(uri1, uri2) 

893 

894 # Now do a multi-dataset but single file ingest 

895 metricFile = os.path.join(dataRoot, "detectors.yaml") 

896 refs = [] 

897 for detector in (1, 2): 

898 detector_name = f"detector_{detector}" 

899 dataId = butler.registry.expandDataId( 

900 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

901 ) 

902 # Create a DatasetRef for ingest 

903 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

904 

905 # Test "move" transfer to ensure that the files themselves 

906 # have disappeared following ingest. 

907 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

908 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

909 

910 datasets = [] 

911 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

912 

913 # For first ingest use copy. 

914 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

915 

916 # Now try to ingest again in "execution butler" mode where 

917 # the registry entries exist but the datastore does not have 

918 # the files. We also need to strip the dimension records to ensure 

919 # that they will be re-added by the ingest. 

920 ref = datasets[0].refs[0] 

921 datasets[0].refs = [ 

922 cast( 

923 DatasetRef, 

924 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run), 

925 ) 

926 for ref in datasets[0].refs 

927 ] 

928 all_refs = [] 

929 for dataset in datasets: 

930 refs = [] 

931 for ref in dataset.refs: 

932 # Create a dict from the dataId to drop the records. 

933 new_data_id = {str(k): v for k, v in ref.dataId.items()} 

934 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run) 

935 assert new_ref is not None 

936 self.assertFalse(new_ref.dataId.hasRecords()) 

937 refs.append(new_ref) 

938 dataset.refs = refs 

939 all_refs.extend(dataset.refs) 

940 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

941 

942 # Use move mode to test that the file is deleted. Also 

943 # disable recording of file size. 

944 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

945 

946 # Check that every ref now has records. 

947 for dataset in datasets: 

948 for ref in dataset.refs: 

949 self.assertTrue(ref.dataId.hasRecords()) 

950 

951 # Ensure that the file has disappeared. 

952 self.assertFalse(tempFile.exists()) 

953 

954 # Check that the datastore recorded no file size. 

955 # Not all datastores can support this. 

956 try: 

957 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

958 self.assertEqual(infos[0].file_size, -1) 

959 except AttributeError: 

960 pass 

961 

962 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

963 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

964 

965 multi1 = butler.get(datasetTypeName, dataId1) 

966 multi2 = butler.get(datasetTypeName, dataId2) 

967 

968 self.assertEqual(multi1, metrics1) 

969 self.assertEqual(multi2, metrics2) 

970 

971 # Compare URIs 

972 uri1 = butler.getURI(datasetTypeName, dataId1) 

973 uri2 = butler.getURI(datasetTypeName, dataId2) 

974 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

975 

976 # Test that removing one does not break the second 

977 # This line will issue a warning log message for a ChainedDatastore 

978 # that uses an InMemoryDatastore since in-memory can not ingest 

979 # files. 

980 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

981 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

982 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

983 multi2b = butler.get(datasetTypeName, dataId2) 

984 self.assertEqual(multi2, multi2b) 

985 

986 # Ensure we can ingest 0 datasets 

987 datasets = [] 

988 butler.ingest(*datasets) 

989 

990 def testPickle(self) -> None: 

991 """Test pickle support.""" 

992 butler = Butler(self.tmpConfigFile, run=self.default_run) 

993 butlerOut = pickle.loads(pickle.dumps(butler)) 

994 self.assertIsInstance(butlerOut, Butler) 

995 self.assertEqual(butlerOut._config, butler._config) 

996 self.assertEqual(butlerOut.collections, butler.collections) 

997 self.assertEqual(butlerOut.run, butler.run) 

998 

999 def testGetDatasetTypes(self) -> None: 

1000 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1001 dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"]) 

1002 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

1003 ( 

1004 "instrument", 

1005 [ 

1006 {"instrument": "DummyCam"}, 

1007 {"instrument": "DummyHSC"}, 

1008 {"instrument": "DummyCamComp"}, 

1009 ], 

1010 ), 

1011 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

1012 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]), 

1013 ] 

1014 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1015 # Add needed Dimensions 

1016 for element, data in dimensionEntries: 

1017 butler.registry.insertDimensionData(element, *data) 

1018 

1019 # When a DatasetType is added to the registry entries are not created 

1020 # for components but querying them can return the components. 

1021 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1022 components = set() 

1023 for datasetTypeName in datasetTypeNames: 

1024 # Create and register a DatasetType 

1025 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1026 

1027 for componentName in storageClass.components: 

1028 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1029 

1030 fromRegistry: set[DatasetType] = set() 

1031 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1032 fromRegistry.add(parent_dataset_type) 

1033 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1034 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1035 

1036 # Now that we have some dataset types registered, validate them 

1037 butler.validateConfiguration( 

1038 ignore=[ 

1039 "test_metric_comp", 

1040 "metric3", 

1041 "metric5", 

1042 "calexp", 

1043 "DummySC", 

1044 "datasetType.component", 

1045 "random_data", 

1046 "random_data_2", 

1047 ] 

1048 ) 

1049 

1050 # Add a new datasetType that will fail template validation 

1051 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1052 if self.validationCanFail: 

1053 with self.assertRaises(ValidationError): 

1054 butler.validateConfiguration() 

1055 

1056 # Rerun validation but with a subset of dataset type names 

1057 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1058 

1059 # Rerun validation but ignore the bad datasetType 

1060 butler.validateConfiguration( 

1061 ignore=[ 

1062 "test_metric_comp", 

1063 "metric3", 

1064 "metric5", 

1065 "calexp", 

1066 "DummySC", 

1067 "datasetType.component", 

1068 "random_data", 

1069 "random_data_2", 

1070 ] 

1071 ) 

1072 

1073 def testTransaction(self) -> None: 

1074 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1075 datasetTypeName = "test_metric" 

1076 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

1077 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1078 ("instrument", {"instrument": "DummyCam"}), 

1079 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1080 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1081 ) 

1082 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1083 metric = makeExampleMetrics() 

1084 dataId = {"instrument": "DummyCam", "visit": 42} 

1085 # Create and register a DatasetType 

1086 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1087 with self.assertRaises(TransactionTestError): 

1088 with butler.transaction(): 

1089 # Add needed Dimensions 

1090 for args in dimensionEntries: 

1091 butler.registry.insertDimensionData(*args) 

1092 # Store a dataset 

1093 ref = butler.put(metric, datasetTypeName, dataId) 

1094 self.assertIsInstance(ref, DatasetRef) 

1095 # Test getDirect 

1096 metricOut = butler.get(ref) 

1097 self.assertEqual(metric, metricOut) 

1098 # Test get 

1099 metricOut = butler.get(datasetTypeName, dataId) 

1100 self.assertEqual(metric, metricOut) 

1101 # Check we can get components 

1102 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1103 raise TransactionTestError("This should roll back the entire transaction") 

1104 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1105 butler.registry.expandDataId(dataId) 

1106 # Should raise LookupError for missing data ID value 

1107 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1108 butler.get(datasetTypeName, dataId) 

1109 # Also check explicitly if Dataset entry is missing 

1110 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1111 # Direct retrieval should not find the file in the Datastore 

1112 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1113 butler.get(ref) 

1114 

1115 def testMakeRepo(self) -> None: 

1116 """Test that we can write butler configuration to a new repository via 

1117 the Butler.makeRepo interface and then instantiate a butler from the 

1118 repo root. 

1119 """ 

1120 # Do not run the test if we know this datastore configuration does 

1121 # not support a file system root 

1122 if self.fullConfigKey is None: 

1123 return 

1124 

1125 # create two separate directories 

1126 root1 = tempfile.mkdtemp(dir=self.root) 

1127 root2 = tempfile.mkdtemp(dir=self.root) 

1128 

1129 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1130 limited = Config(self.configFile) 

1131 butler1 = Butler(butlerConfig) 

1132 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1133 full = Config(self.tmpConfigFile) 

1134 butler2 = Butler(butlerConfig) 

1135 # Butlers should have the same configuration regardless of whether 

1136 # defaults were expanded. 

1137 self.assertEqual(butler1._config, butler2._config) 

1138 # Config files loaded directly should not be the same. 

1139 self.assertNotEqual(limited, full) 

1140 # Make sure "limited" doesn't have a few keys we know it should be 

1141 # inheriting from defaults. 

1142 self.assertIn(self.fullConfigKey, full) 

1143 self.assertNotIn(self.fullConfigKey, limited) 

1144 

1145 # Collections don't appear until something is put in them 

1146 collections1 = set(butler1.registry.queryCollections()) 

1147 self.assertEqual(collections1, set()) 

1148 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1149 

1150 # Check that a config with no associated file name will not 

1151 # work properly with relocatable Butler repo 

1152 butlerConfig.configFile = None 

1153 with self.assertRaises(ValueError): 

1154 Butler(butlerConfig) 

1155 

1156 with self.assertRaises(FileExistsError): 

1157 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1158 

1159 def testStringification(self) -> None: 

1160 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1161 butlerStr = str(butler) 

1162 

1163 if self.datastoreStr is not None: 

1164 for testStr in self.datastoreStr: 

1165 self.assertIn(testStr, butlerStr) 

1166 if self.registryStr is not None: 

1167 self.assertIn(self.registryStr, butlerStr) 

1168 

1169 datastoreName = butler._datastore.name 

1170 if self.datastoreName is not None: 

1171 for testStr in self.datastoreName: 

1172 self.assertIn(testStr, datastoreName) 

1173 

1174 def testButlerRewriteDataId(self) -> None: 

1175 """Test that dataIds can be rewritten based on dimension records.""" 

1176 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1177 

1178 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1179 datasetTypeName = "random_data" 

1180 

1181 # Create dimension records. 

1182 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1183 butler.registry.insertDimensionData( 

1184 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1185 ) 

1186 butler.registry.insertDimensionData( 

1187 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1188 ) 

1189 

1190 dimensions = butler.dimensions.extract(["instrument", "exposure"]) 

1191 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1192 butler.registry.registerDatasetType(datasetType) 

1193 

1194 n_exposures = 5 

1195 dayobs = 20210530 

1196 

1197 for i in range(n_exposures): 

1198 butler.registry.insertDimensionData( 

1199 "exposure", 

1200 { 

1201 "instrument": "DummyCamComp", 

1202 "id": i, 

1203 "obs_id": f"exp{i}", 

1204 "seq_num": i, 

1205 "day_obs": dayobs, 

1206 "physical_filter": "d-r", 

1207 }, 

1208 ) 

1209 

1210 # Write some data. 

1211 for i in range(n_exposures): 

1212 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1213 

1214 # Use the seq_num for the put to test rewriting. 

1215 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1216 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1217 

1218 # Check that the exposure is correct in the dataId 

1219 self.assertEqual(ref.dataId["exposure"], i) 

1220 

1221 # and check that we can get the dataset back with the same dataId 

1222 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1223 self.assertEqual(new_metric, metric) 

1224 

1225 

1226class FileDatastoreButlerTests(ButlerTests): 

1227 """Common tests and specialization of ButlerTests for butlers backed 

1228 by datastores that inherit from FileDatastore. 

1229 """ 

1230 

1231 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1232 """Check if file exists at a given path (relative to root). 

1233 

1234 Test testPutTemplates verifies actual physical existance of the files 

1235 in the requested location. 

1236 """ 

1237 uri = ResourcePath(root, forceDirectory=True) 

1238 return uri.join(relpath).exists() 

1239 

1240 def testPutTemplates(self) -> None: 

1241 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1242 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1243 

1244 # Add needed Dimensions 

1245 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1246 butler.registry.insertDimensionData( 

1247 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1248 ) 

1249 butler.registry.insertDimensionData( 

1250 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1251 ) 

1252 butler.registry.insertDimensionData( 

1253 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1254 ) 

1255 

1256 # Create and store a dataset 

1257 metric = makeExampleMetrics() 

1258 

1259 # Create two almost-identical DatasetTypes (both will use default 

1260 # template) 

1261 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

1262 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1263 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1264 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1265 

1266 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1267 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1268 

1269 # Put with exactly the data ID keys needed 

1270 ref = butler.put(metric, "metric1", dataId1) 

1271 uri = butler.getURI(ref) 

1272 self.assertTrue(uri.exists()) 

1273 self.assertTrue( 

1274 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1275 ) 

1276 

1277 # Check the template based on dimensions 

1278 if hasattr(butler._datastore, "templates"): 

1279 butler._datastore.templates.validateTemplates([ref]) 

1280 

1281 # Put with extra data ID keys (physical_filter is an optional 

1282 # dependency); should not change template (at least the way we're 

1283 # defining them to behave now; the important thing is that they 

1284 # must be consistent). 

1285 ref = butler.put(metric, "metric2", dataId2) 

1286 uri = butler.getURI(ref) 

1287 self.assertTrue(uri.exists()) 

1288 self.assertTrue( 

1289 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1290 ) 

1291 

1292 # Check the template based on dimensions 

1293 if hasattr(butler._datastore, "templates"): 

1294 butler._datastore.templates.validateTemplates([ref]) 

1295 

1296 # Use a template that has a typo in dimension record metadata. 

1297 # Easier to test with a butler that has a ref with records attached. 

1298 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1299 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1300 path = template.format(ref) 

1301 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1302 

1303 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1304 with self.assertRaises(KeyError): 

1305 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1306 template.format(ref) 

1307 

1308 # Now use a file template that will not result in unique filenames 

1309 with self.assertRaises(FileTemplateValidationError): 

1310 butler.put(metric, "metric3", dataId1) 

1311 

1312 def testImportExport(self) -> None: 

1313 # Run put/get tests just to create and populate a repo. 

1314 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1315 self.runImportExportTest(storageClass) 

1316 

1317 @unittest.expectedFailure 

1318 def testImportExportVirtualComposite(self) -> None: 

1319 # Run put/get tests just to create and populate a repo. 

1320 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1321 self.runImportExportTest(storageClass) 

1322 

1323 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1324 """Test exporting and importing. 

1325 

1326 This test does an export to a temp directory and an import back 

1327 into a new temp directory repo. It does not assume a posix datastore. 

1328 """ 

1329 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1330 

1331 # Test that we must have a file extension. 

1332 with self.assertRaises(ValueError): 

1333 with exportButler.export(filename="dump", directory=".") as export: 

1334 pass 

1335 

1336 # Test that unknown format is not allowed. 

1337 with self.assertRaises(ValueError): 

1338 with exportButler.export(filename="dump.fits", directory=".") as export: 

1339 pass 

1340 

1341 # Test that the repo actually has at least one dataset. 

1342 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1343 self.assertGreater(len(datasets), 0) 

1344 # Add a DimensionRecord that's unused by those datasets. 

1345 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1346 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1347 # Export and then import datasets. 

1348 with safeTestTempDir(TESTDIR) as exportDir: 

1349 exportFile = os.path.join(exportDir, "exports.yaml") 

1350 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1351 export.saveDatasets(datasets) 

1352 # Export the same datasets again. This should quietly do 

1353 # nothing because of internal deduplication, and it shouldn't 

1354 # complain about being asked to export the "htm7" elements even 

1355 # though there aren't any in these datasets or in the database. 

1356 export.saveDatasets(datasets, elements=["htm7"]) 

1357 # Save one of the data IDs again; this should be harmless 

1358 # because of internal deduplication. 

1359 export.saveDataIds([datasets[0].dataId]) 

1360 # Save some dimension records directly. 

1361 export.saveDimensionData("skymap", [skymapRecord]) 

1362 self.assertTrue(os.path.exists(exportFile)) 

1363 with safeTestTempDir(TESTDIR) as importDir: 

1364 # We always want this to be a local posix butler 

1365 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1366 # Calling script.butlerImport tests the implementation of the 

1367 # butler command line interface "import" subcommand. Functions 

1368 # in the script folder are generally considered protected and 

1369 # should not be used as public api. 

1370 with open(exportFile) as f: 

1371 script.butlerImport( 

1372 importDir, 

1373 export_file=f, 

1374 directory=exportDir, 

1375 transfer="auto", 

1376 skip_dimensions=None, 

1377 ) 

1378 importButler = Butler(importDir, run=self.default_run) 

1379 for ref in datasets: 

1380 with self.subTest(ref=ref): 

1381 # Test for existence by passing in the DatasetType and 

1382 # data ID separately, to avoid lookup by dataset_id. 

1383 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1384 self.assertEqual( 

1385 list(importButler.registry.queryDimensionRecords("skymap")), 

1386 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)], 

1387 ) 

1388 

1389 def testRemoveRuns(self) -> None: 

1390 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1391 butler = Butler(self.tmpConfigFile, writeable=True) 

1392 # Load registry data with dimensions to hang datasets off of. 

1393 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1394 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1395 # Add some RUN-type collection. 

1396 run1 = "run1" 

1397 butler.registry.registerRun(run1) 

1398 run2 = "run2" 

1399 butler.registry.registerRun(run2) 

1400 # put a dataset in each 

1401 metric = makeExampleMetrics() 

1402 dimensions = butler.dimensions.extract(["instrument", "physical_filter"]) 

1403 datasetType = self.addDatasetType( 

1404 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1405 ) 

1406 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1407 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1408 uri1 = butler.getURI(ref1) 

1409 uri2 = butler.getURI(ref2) 

1410 

1411 with self.assertRaises(OrphanedRecordError): 

1412 butler.registry.removeDatasetType(datasetType.name) 

1413 

1414 # Remove from both runs with different values for unstore. 

1415 butler.removeRuns([run1], unstore=True) 

1416 butler.removeRuns([run2], unstore=False) 

1417 # Should be nothing in registry for either one, and datastore should 

1418 # not think either exists. 

1419 with self.assertRaises(MissingCollectionError): 

1420 butler.registry.getCollectionType(run1) 

1421 with self.assertRaises(MissingCollectionError): 

1422 butler.registry.getCollectionType(run2) 

1423 self.assertFalse(butler.stored(ref1)) 

1424 self.assertFalse(butler.stored(ref2)) 

1425 # The ref we unstored should be gone according to the URI, but the 

1426 # one we forgot should still be around. 

1427 self.assertFalse(uri1.exists()) 

1428 self.assertTrue(uri2.exists()) 

1429 

1430 # Now that the collections have been pruned we can remove the 

1431 # dataset type 

1432 butler.registry.removeDatasetType(datasetType.name) 

1433 

1434 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm: 

1435 butler.registry.removeDatasetType(("test*", "test*")) 

1436 self.assertIn("not defined", "\n".join(cm.output)) 

1437 

1438 

1439class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1440 """PosixDatastore specialization of a butler""" 

1441 

1442 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1443 fullConfigKey: str | None = ".datastore.formatters" 

1444 validationCanFail = True 

1445 datastoreStr = ["/tmp"] 

1446 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1447 registryStr = "/gen3.sqlite3" 

1448 

1449 def testPathConstructor(self) -> None: 

1450 """Independent test of constructor using PathLike.""" 

1451 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1452 self.assertIsInstance(butler, Butler) 

1453 

1454 # And again with a Path object with the butler yaml 

1455 path = pathlib.Path(self.tmpConfigFile) 

1456 butler = Butler(path, writeable=False) 

1457 self.assertIsInstance(butler, Butler) 

1458 

1459 # And again with a Path object without the butler yaml 

1460 # (making sure we skip it if the tmp config doesn't end 

1461 # in butler.yaml -- which is the case for a subclass) 

1462 if self.tmpConfigFile.endswith("butler.yaml"): 

1463 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1464 butler = Butler(path, writeable=False) 

1465 self.assertIsInstance(butler, Butler) 

1466 

1467 def testExportTransferCopy(self) -> None: 

1468 """Test local export using all transfer modes""" 

1469 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1470 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1471 # Test that the repo actually has at least one dataset. 

1472 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1473 self.assertGreater(len(datasets), 0) 

1474 uris = [exportButler.getURI(d) for d in datasets] 

1475 assert isinstance(exportButler._datastore, FileDatastore) 

1476 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]] 

1477 

1478 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1479 

1480 for path in pathsInStore: 

1481 # Assume local file system 

1482 assert path is not None 

1483 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1484 

1485 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1486 with safeTestTempDir(TESTDIR) as exportDir: 

1487 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1488 export.saveDatasets(datasets) 

1489 for path in pathsInStore: 

1490 assert path is not None 

1491 self.assertTrue( 

1492 self.checkFileExists(exportDir, path), 

1493 f"Check that mode {transfer} exported files", 

1494 ) 

1495 

1496 def testPruneDatasets(self) -> None: 

1497 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1498 butler = Butler(self.tmpConfigFile, writeable=True) 

1499 assert isinstance(butler._datastore, FileDatastore) 

1500 # Load registry data with dimensions to hang datasets off of. 

1501 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1502 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1503 # Add some RUN-type collections. 

1504 run1 = "run1" 

1505 butler.registry.registerRun(run1) 

1506 run2 = "run2" 

1507 butler.registry.registerRun(run2) 

1508 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1509 # different runs. ref3 has a different data ID. 

1510 metric = makeExampleMetrics() 

1511 dimensions = butler.dimensions.extract(["instrument", "physical_filter"]) 

1512 datasetType = self.addDatasetType( 

1513 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1514 ) 

1515 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1516 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1517 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1518 

1519 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1520 for ref, stored in many_stored.items(): 

1521 self.assertTrue(stored, f"Ref {ref} should be stored") 

1522 

1523 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1524 for ref, exists in many_exists.items(): 

1525 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1526 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1527 

1528 # Simple prune. 

1529 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1530 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1531 

1532 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1533 for ref, stored in many_stored.items(): 

1534 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1535 

1536 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1537 for ref, exists in many_exists.items(): 

1538 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1539 

1540 # Put data back. 

1541 ref1_new = butler.put(metric, ref1) 

1542 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1543 ref2 = butler.put(metric, ref2) 

1544 

1545 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1546 self.assertTrue(many_stored[ref1]) 

1547 self.assertTrue(many_stored[ref2]) 

1548 self.assertFalse(many_stored[ref3]) 

1549 

1550 ref3 = butler.put(metric, ref3) 

1551 

1552 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1553 for ref, exists in many_exists.items(): 

1554 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1555 

1556 # Clear out the datasets from registry and start again. 

1557 refs = [ref1, ref2, ref3] 

1558 butler.pruneDatasets(refs, purge=True, unstore=True) 

1559 for ref in refs: 

1560 butler.put(metric, ref) 

1561 

1562 # Confirm we can retrieve deferred. 

1563 dref1 = butler.getDeferred(ref1) # known and exists 

1564 metric1 = dref1.get() 

1565 self.assertEqual(metric1, metric) 

1566 

1567 # Test different forms of file availability. 

1568 # Need to be in a state where: 

1569 # - one ref just has registry record. 

1570 # - one ref has a missing file but a datastore record. 

1571 # - one ref has a missing datastore record but file is there. 

1572 # - one ref does not exist anywhere. 

1573 # Do not need to test a ref that has everything since that is tested 

1574 # above. 

1575 ref0 = DatasetRef( 

1576 datasetType, 

1577 DataCoordinate.standardize( 

1578 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1579 ), 

1580 run=run1, 

1581 ) 

1582 

1583 # Delete from datastore and retain in Registry. 

1584 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1585 

1586 # File has been removed. 

1587 uri2 = butler.getURI(ref2) 

1588 uri2.remove() 

1589 

1590 # Datastore has lost track. 

1591 butler._datastore.forget([ref3]) 

1592 

1593 # First test with a standard butler. 

1594 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1595 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1596 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1597 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1598 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1599 

1600 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1601 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1602 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1603 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1604 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1605 self.assertTrue(exists_many[ref2]) 

1606 

1607 # Check that per-ref query gives the same answer as many query. 

1608 for ref, exists in exists_many.items(): 

1609 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1610 

1611 # Get deferred checks for existence before it allows it to be 

1612 # retrieved. 

1613 with self.assertRaises(LookupError): 

1614 butler.getDeferred(ref3) # not known, file exists 

1615 dref2 = butler.getDeferred(ref2) # known but file missing 

1616 with self.assertRaises(FileNotFoundError): 

1617 dref2.get() 

1618 

1619 # Test again with a trusting butler. 

1620 butler._datastore.trustGetRequest = True 

1621 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1622 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1623 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1624 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1625 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1626 

1627 # When trusting we can get a deferred dataset handle that is not 

1628 # known but does exist. 

1629 dref3 = butler.getDeferred(ref3) 

1630 metric3 = dref3.get() 

1631 self.assertEqual(metric3, metric) 

1632 

1633 # Check that per-ref query gives the same answer as many query. 

1634 for ref, exists in exists_many.items(): 

1635 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1636 

1637 # Create a ref that surprisingly has the UUID of an existing ref 

1638 # but is not the same. 

1639 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1640 with self.assertRaises(ValueError): 

1641 butler.exists(ref_bad) 

1642 

1643 # Create a ref that has a compatible storage class. 

1644 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1645 exists = butler.exists(ref_compat) 

1646 self.assertEqual(exists, exists_many[ref2]) 

1647 

1648 # Remove everything and start from scratch. 

1649 butler._datastore.trustGetRequest = False 

1650 butler.pruneDatasets(refs, purge=True, unstore=True) 

1651 for ref in refs: 

1652 butler.put(metric, ref) 

1653 

1654 # These tests mess directly with the trash table and can leave the 

1655 # datastore in an odd state. Do them at the end. 

1656 # Check that in normal mode, deleting the record will lead to 

1657 # trash not touching the file. 

1658 uri1 = butler.getURI(ref1) 

1659 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1660 butler._datastore.forget([ref1]) 

1661 butler._datastore.trash(ref1) 

1662 butler._datastore.emptyTrash() 

1663 self.assertTrue(uri1.exists()) 

1664 uri1.remove() # Clean it up. 

1665 

1666 # Simulate execution butler setup by deleting the datastore 

1667 # record but keeping the file around and trusting. 

1668 butler._datastore.trustGetRequest = True 

1669 uris = butler.get_many_uris([ref2, ref3]) 

1670 uri2 = uris[ref2].primaryURI 

1671 uri3 = uris[ref3].primaryURI 

1672 self.assertTrue(uri2.exists()) 

1673 self.assertTrue(uri3.exists()) 

1674 

1675 # Remove the datastore record. 

1676 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1677 butler._datastore.forget([ref2]) 

1678 self.assertTrue(uri2.exists()) 

1679 butler._datastore.trash([ref2, ref3]) 

1680 # Immediate removal for ref2 file 

1681 self.assertFalse(uri2.exists()) 

1682 # But ref3 has to wait for the empty. 

1683 self.assertTrue(uri3.exists()) 

1684 butler._datastore.emptyTrash() 

1685 self.assertFalse(uri3.exists()) 

1686 

1687 # Clear out the datasets from registry. 

1688 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1689 

1690 def testPytypeCoercion(self) -> None: 

1691 """Test python type coercion on Butler.get and put.""" 

1692 # Store some data with the normal example storage class. 

1693 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1694 datasetTypeName = "test_metric" 

1695 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1696 

1697 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1698 metric = butler.get(datasetTypeName, dataId=dataId) 

1699 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1700 

1701 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1702 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1703 

1704 # Now need to hack the registry dataset type definition. 

1705 # There is no API for this. 

1706 assert isinstance(butler._registry, SqlRegistry) 

1707 manager = butler._registry._managers.datasets 

1708 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1709 manager._db.update( 

1710 manager._static.dataset_type, 

1711 {"name": datasetTypeName}, 

1712 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1713 ) 

1714 

1715 # Force reset of dataset type cache 

1716 butler.registry.refresh() 

1717 

1718 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1719 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1720 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1721 

1722 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1723 self.assertNotEqual(type(metric_model), type(metric)) 

1724 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1725 

1726 # Put the model and read it back to show that everything now 

1727 # works as normal. 

1728 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1729 metric_model_new = butler.get(metric_ref) 

1730 self.assertEqual(metric_model_new, metric_model) 

1731 

1732 # Hack the storage class again to something that will fail on the 

1733 # get with no conversion class. 

1734 manager._db.update( 

1735 manager._static.dataset_type, 

1736 {"name": datasetTypeName}, 

1737 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1738 ) 

1739 butler.registry.refresh() 

1740 

1741 with self.assertRaises(ValueError): 

1742 butler.get(datasetTypeName, dataId=dataId) 

1743 

1744 

1745@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1746class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1747 """PosixDatastore specialization of a butler using Postgres""" 

1748 

1749 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1750 fullConfigKey = ".datastore.formatters" 

1751 validationCanFail = True 

1752 datastoreStr = ["/tmp"] 

1753 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1754 registryStr = "PostgreSQL@test" 

1755 postgresql: Any 

1756 

1757 @staticmethod 

1758 def _handler(postgresql: Any) -> None: 

1759 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1760 with engine.begin() as connection: 

1761 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1762 

1763 @classmethod 

1764 def setUpClass(cls) -> None: 

1765 # Create the postgres test server. 

1766 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1767 cache_initialized_db=True, on_initialized=cls._handler 

1768 ) 

1769 super().setUpClass() 

1770 

1771 @classmethod 

1772 def tearDownClass(cls) -> None: 

1773 # Clean up any lingering SQLAlchemy engines/connections 

1774 # so they're closed before we shut down the server. 

1775 gc.collect() 

1776 cls.postgresql.clear_cache() 

1777 super().tearDownClass() 

1778 

1779 def setUp(self) -> None: 

1780 self.server = self.postgresql() 

1781 

1782 # Need to add a registry section to the config. 

1783 self._temp_config = False 

1784 config = Config(self.configFile) 

1785 config["registry", "db"] = self.server.url() 

1786 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1787 config.dump(fh) 

1788 self.configFile = fh.name 

1789 self._temp_config = True 

1790 super().setUp() 

1791 

1792 def tearDown(self) -> None: 

1793 self.server.stop() 

1794 if self._temp_config and os.path.exists(self.configFile): 

1795 os.remove(self.configFile) 

1796 super().tearDown() 

1797 

1798 def testMakeRepo(self) -> None: 

1799 # The base class test assumes that it's using sqlite and assumes 

1800 # the config file is acceptable to sqlite. 

1801 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1802 

1803 

1804class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1805 """InMemoryDatastore specialization of a butler""" 

1806 

1807 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1808 fullConfigKey = None 

1809 useTempRoot = False 

1810 validationCanFail = False 

1811 datastoreStr = ["datastore='InMemory"] 

1812 datastoreName = ["InMemoryDatastore@"] 

1813 registryStr = "/gen3.sqlite3" 

1814 

1815 def testIngest(self) -> None: 

1816 pass 

1817 

1818 

1819class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1820 """PosixDatastore specialization""" 

1821 

1822 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1823 fullConfigKey = ".datastore.datastores.1.formatters" 

1824 validationCanFail = True 

1825 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1826 datastoreName = [ 

1827 "InMemoryDatastore@", 

1828 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1829 "SecondDatastore", 

1830 ] 

1831 registryStr = "/gen3.sqlite3" 

1832 

1833 

1834class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1835 """Test that a yaml file in one location can refer to a root in another.""" 

1836 

1837 datastoreStr = ["dir1"] 

1838 # Disable the makeRepo test since we are deliberately not using 

1839 # butler.yaml as the config name. 

1840 fullConfigKey = None 

1841 

1842 def setUp(self) -> None: 

1843 self.root = makeTestTempDir(TESTDIR) 

1844 

1845 # Make a new repository in one place 

1846 self.dir1 = os.path.join(self.root, "dir1") 

1847 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1848 

1849 # Move the yaml file to a different place and add a "root" 

1850 self.dir2 = os.path.join(self.root, "dir2") 

1851 os.makedirs(self.dir2, exist_ok=True) 

1852 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1853 config = Config(configFile1) 

1854 config["root"] = self.dir1 

1855 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1856 config.dumpToUri(configFile2) 

1857 os.remove(configFile1) 

1858 self.tmpConfigFile = configFile2 

1859 

1860 def testFileLocations(self) -> None: 

1861 self.assertNotEqual(self.dir1, self.dir2) 

1862 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1863 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1864 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1865 

1866 

1867class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1868 """Test that a config file created by makeRepo outside of repo works.""" 

1869 

1870 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1871 

1872 def setUp(self) -> None: 

1873 self.root = makeTestTempDir(TESTDIR) 

1874 self.root2 = makeTestTempDir(TESTDIR) 

1875 

1876 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1877 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1878 

1879 def tearDown(self) -> None: 

1880 if os.path.exists(self.root2): 

1881 shutil.rmtree(self.root2, ignore_errors=True) 

1882 super().tearDown() 

1883 

1884 def testConfigExistence(self) -> None: 

1885 c = Config(self.tmpConfigFile) 

1886 uri_config = ResourcePath(c["root"]) 

1887 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1888 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1889 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1890 

1891 def testPutGet(self) -> None: 

1892 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1893 self.runPutGetTest(storageClass, "test_metric") 

1894 

1895 

1896class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1897 """Test that a config file created by makeRepo outside of repo works.""" 

1898 

1899 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1900 

1901 def setUp(self) -> None: 

1902 self.root = makeTestTempDir(TESTDIR) 

1903 self.root2 = makeTestTempDir(TESTDIR) 

1904 

1905 self.tmpConfigFile = self.root2 

1906 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1907 

1908 def testConfigExistence(self) -> None: 

1909 # Append the yaml file else Config constructor does not know the file 

1910 # type. 

1911 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1912 super().testConfigExistence() 

1913 

1914 

1915class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1916 """Test that a config file created by makeRepo outside of repo works.""" 

1917 

1918 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1919 

1920 def setUp(self) -> None: 

1921 self.root = makeTestTempDir(TESTDIR) 

1922 self.root2 = makeTestTempDir(TESTDIR) 

1923 

1924 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1925 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1926 

1927 

1928@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1929class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1930 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1931 a local in-memory SqlRegistry. 

1932 """ 

1933 

1934 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1935 fullConfigKey = None 

1936 validationCanFail = True 

1937 

1938 bucketName = "anybucketname" 

1939 """Name of the Bucket that will be used in the tests. The name is read from 

1940 the config file used with the tests during set-up. 

1941 """ 

1942 

1943 root = "butlerRoot/" 

1944 """Root repository directory expected to be used in case useTempRoot=False. 

1945 Otherwise the root is set to a 20 characters long randomly generated string 

1946 during set-up. 

1947 """ 

1948 

1949 datastoreStr = [f"datastore={root}"] 

1950 """Contains all expected root locations in a format expected to be 

1951 returned by Butler stringification. 

1952 """ 

1953 

1954 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1955 """The expected format of the S3 Datastore string.""" 

1956 

1957 registryStr = "/gen3.sqlite3" 

1958 """Expected format of the Registry string.""" 

1959 

1960 mock_s3 = mock_s3() 

1961 """The mocked s3 interface from moto.""" 

1962 

1963 def genRoot(self) -> str: 

1964 """Return a random string of len 20 to serve as a root 

1965 name for the temporary bucket repo. 

1966 

1967 This is equivalent to tempfile.mkdtemp as this is what self.root 

1968 becomes when useTempRoot is True. 

1969 """ 

1970 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1971 return rndstr + "/" 

1972 

1973 def setUp(self) -> None: 

1974 config = Config(self.configFile) 

1975 uri = ResourcePath(config[".datastore.datastore.root"]) 

1976 self.bucketName = uri.netloc 

1977 

1978 # Enable S3 mocking of tests. 

1979 self.mock_s3.start() 

1980 

1981 # set up some fake credentials if they do not exist 

1982 self.usingDummyCredentials = setAwsEnvCredentials() 

1983 

1984 if self.useTempRoot: 

1985 self.root = self.genRoot() 

1986 rooturi = f"s3://{self.bucketName}/{self.root}" 

1987 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1988 

1989 # need local folder to store registry database 

1990 self.reg_dir = makeTestTempDir(TESTDIR) 

1991 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1992 

1993 # MOTO needs to know that we expect Bucket bucketname to exist 

1994 # (this used to be the class attribute bucketName) 

1995 s3 = boto3.resource("s3") 

1996 s3.create_bucket(Bucket=self.bucketName) 

1997 

1998 self.datastoreStr = [f"datastore='{rooturi}'"] 

1999 self.datastoreName = [f"FileDatastore@{rooturi}"] 

2000 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

2001 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

2002 

2003 def tearDown(self) -> None: 

2004 s3 = boto3.resource("s3") 

2005 bucket = s3.Bucket(self.bucketName) 

2006 try: 

2007 bucket.objects.all().delete() 

2008 except botocore.exceptions.ClientError as e: 

2009 if e.response["Error"]["Code"] == "404": 

2010 # the key was not reachable - pass 

2011 pass 

2012 else: 

2013 raise 

2014 

2015 bucket = s3.Bucket(self.bucketName) 

2016 bucket.delete() 

2017 

2018 # Stop the S3 mock. 

2019 self.mock_s3.stop() 

2020 

2021 # unset any potentially set dummy credentials 

2022 if self.usingDummyCredentials: 

2023 unsetAwsEnvCredentials() 

2024 

2025 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

2026 shutil.rmtree(self.reg_dir, ignore_errors=True) 

2027 

2028 if self.useTempRoot and os.path.exists(self.root): 

2029 shutil.rmtree(self.root, ignore_errors=True) 

2030 

2031 super().tearDown() 

2032 

2033 

2034class PosixDatastoreTransfers(unittest.TestCase): 

2035 """Test data transfers between butlers. 

2036 

2037 Test for different managers. UUID to UUID and integer to integer are 

2038 tested. UUID to integer is not supported since we do not currently 

2039 want to allow that. Integer to UUID is supported with the caveat 

2040 that UUID4 will be generated and this will be incorrect for raw 

2041 dataset types. The test ignores that. 

2042 """ 

2043 

2044 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2045 storageClassFactory: StorageClassFactory 

2046 

2047 @classmethod 

2048 def setUpClass(cls) -> None: 

2049 cls.storageClassFactory = StorageClassFactory() 

2050 cls.storageClassFactory.addFromConfig(cls.configFile) 

2051 

2052 def setUp(self) -> None: 

2053 self.root = makeTestTempDir(TESTDIR) 

2054 self.config = Config(self.configFile) 

2055 

2056 def tearDown(self) -> None: 

2057 removeTestTempDir(self.root) 

2058 

2059 def create_butler(self, manager: str, label: str) -> Butler: 

2060 config = Config(self.configFile) 

2061 config["registry", "managers", "datasets"] = manager 

2062 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2063 

2064 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

2065 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

2066 if manager1 is None: 

2067 manager1 = default 

2068 if manager2 is None: 

2069 manager2 = default 

2070 self.source_butler = self.create_butler(manager1, "1") 

2071 self.target_butler = self.create_butler(manager2, "2") 

2072 

2073 def testTransferUuidToUuid(self) -> None: 

2074 self.create_butlers() 

2075 self.assertButlerTransfers() 

2076 

2077 def _enable_trust(self, datastore: Datastore) -> None: 

2078 datastores = getattr(datastore, "datastores", [datastore]) 

2079 for this_datastore in datastores: 

2080 if hasattr(this_datastore, "trustGetRequest"): 

2081 this_datastore.trustGetRequest = True 

2082 

2083 def testTransferMissing(self) -> None: 

2084 """Test transfers where datastore records are missing. 

2085 

2086 This is how execution butler works. 

2087 """ 

2088 self.create_butlers() 

2089 

2090 # Configure the source butler to allow trust. 

2091 self._enable_trust(self.source_butler._datastore) 

2092 

2093 self.assertButlerTransfers(purge=True) 

2094 

2095 def testTransferMissingDisassembly(self) -> None: 

2096 """Test transfers where datastore records are missing. 

2097 

2098 This is how execution butler works. 

2099 """ 

2100 self.create_butlers() 

2101 

2102 # Configure the source butler to allow trust. 

2103 self._enable_trust(self.source_butler._datastore) 

2104 

2105 # Test disassembly. 

2106 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2107 

2108 def testAbsoluteURITransferDirect(self) -> None: 

2109 """Test transfer using an absolute URI.""" 

2110 self._absolute_transfer("auto") 

2111 

2112 def testAbsoluteURITransferCopy(self) -> None: 

2113 """Test transfer using an absolute URI.""" 

2114 self._absolute_transfer("copy") 

2115 

2116 def _absolute_transfer(self, transfer: str) -> None: 

2117 self.create_butlers() 

2118 

2119 storageClassName = "StructuredData" 

2120 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2121 datasetTypeName = "random_data" 

2122 run = "run1" 

2123 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2124 

2125 dimensions = self.source_butler.dimensions.extract(()) 

2126 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2127 self.source_butler.registry.registerDatasetType(datasetType) 

2128 

2129 metrics = makeExampleMetrics() 

2130 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2131 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions) 

2132 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2133 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2134 dataset = FileDataset(path=temp, refs=source_refs) 

2135 self.source_butler.ingest(dataset, transfer="direct") 

2136 

2137 self.target_butler.transfer_from( 

2138 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2139 ) 

2140 

2141 uri = self.target_butler.getURI(dataset.refs[0]) 

2142 if transfer == "auto": 

2143 self.assertEqual(uri, temp) 

2144 else: 

2145 self.assertNotEqual(uri, temp) 

2146 

2147 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2148 """Test that a run can be transferred to another butler.""" 

2149 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2150 datasetTypeName = "random_data" 

2151 

2152 # Test will create 3 collections and we will want to transfer 

2153 # two of those three. 

2154 runs = ["run1", "run2", "other"] 

2155 

2156 # Also want to use two different dataset types to ensure that 

2157 # grouping works. 

2158 datasetTypeNames = ["random_data", "random_data_2"] 

2159 

2160 # Create the run collections in the source butler. 

2161 for run in runs: 

2162 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2163 

2164 # Create dimensions in source butler. 

2165 n_exposures = 30 

2166 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2167 self.source_butler.registry.insertDimensionData( 

2168 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2169 ) 

2170 self.source_butler.registry.insertDimensionData( 

2171 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2172 ) 

2173 

2174 for i in range(n_exposures): 

2175 self.source_butler.registry.insertDimensionData( 

2176 "exposure", 

2177 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2178 ) 

2179 

2180 # Create dataset types in the source butler. 

2181 dimensions = self.source_butler.dimensions.extract(["instrument", "exposure"]) 

2182 for datasetTypeName in datasetTypeNames: 

2183 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2184 self.source_butler.registry.registerDatasetType(datasetType) 

2185 

2186 # Write a dataset to an unrelated run -- this will ensure that 

2187 # we are rewriting integer dataset ids in the target if necessary. 

2188 # Will not be relevant for UUID. 

2189 run = "distraction" 

2190 butler = Butler(butler=self.source_butler, run=run) 

2191 butler.put( 

2192 makeExampleMetrics(), 

2193 datasetTypeName, 

2194 exposure=1, 

2195 instrument="DummyCamComp", 

2196 physical_filter="d-r", 

2197 ) 

2198 

2199 # Write some example metrics to the source 

2200 butler = Butler(butler=self.source_butler) 

2201 

2202 # Set of DatasetRefs that should be in the list of refs to transfer 

2203 # but which will not be transferred. 

2204 deleted: set[DatasetRef] = set() 

2205 

2206 n_expected = 20 # Number of datasets expected to be transferred 

2207 source_refs = [] 

2208 for i in range(n_exposures): 

2209 # Put a third of datasets into each collection, only retain 

2210 # two thirds. 

2211 index = i % 3 

2212 run = runs[index] 

2213 datasetTypeName = datasetTypeNames[i % 2] 

2214 

2215 metric = MetricsExample( 

2216 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2217 ) 

2218 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2219 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2220 

2221 # Remove the datastore record using low-level API, but only 

2222 # for a specific index. 

2223 if purge and index == 1: 

2224 # For one of these delete the file as well. 

2225 # This allows the "missing" code to filter the 

2226 # file out. 

2227 # Access the individual datastores. 

2228 datastores = [] 

2229 if hasattr(butler._datastore, "datastores"): 

2230 datastores.extend(butler._datastore.datastores) 

2231 else: 

2232 datastores.append(butler._datastore) 

2233 

2234 if not deleted: 

2235 # For a chained datastore we need to remove 

2236 # files in each chain. 

2237 for datastore in datastores: 

2238 # The file might not be known to the datastore 

2239 # if constraints are used. 

2240 try: 

2241 primary, uris = datastore.getURIs(ref) 

2242 except FileNotFoundError: 

2243 continue 

2244 if primary and primary.scheme != "mem": 

2245 primary.remove() 

2246 for uri in uris.values(): 

2247 if uri.scheme != "mem": 

2248 uri.remove() 

2249 n_expected -= 1 

2250 deleted.add(ref) 

2251 

2252 # Remove the datastore record. 

2253 for datastore in datastores: 

2254 if hasattr(datastore, "removeStoredItemInfo"): 

2255 datastore.removeStoredItemInfo(ref) 

2256 

2257 if index < 2: 

2258 source_refs.append(ref) 

2259 if ref not in deleted: 

2260 new_metric = butler.get(ref) 

2261 self.assertEqual(new_metric, metric) 

2262 

2263 # Create some bad dataset types to ensure we check for inconsistent 

2264 # definitions. 

2265 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2266 for datasetTypeName in datasetTypeNames: 

2267 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2268 self.target_butler.registry.registerDatasetType(datasetType) 

2269 with self.assertRaises(ConflictingDefinitionError) as cm: 

2270 self.target_butler.transfer_from(self.source_butler, source_refs) 

2271 self.assertIn("dataset type differs", str(cm.exception)) 

2272 

2273 # And remove the bad definitions. 

2274 for datasetTypeName in datasetTypeNames: 

2275 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2276 

2277 # Transfer without creating dataset types should fail. 

2278 with self.assertRaises(KeyError): 

2279 self.target_butler.transfer_from(self.source_butler, source_refs) 

2280 

2281 # Transfer without creating dimensions should fail. 

2282 with self.assertRaises(ConflictingDefinitionError) as cm: 

2283 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2284 self.assertIn("dimension", str(cm.exception)) 

2285 

2286 # The failed transfer above leaves registry in an inconsistent 

2287 # state because the run is created but then rolled back without 

2288 # the collection cache being cleared. For now force a refresh. 

2289 # Can remove with DM-35498. 

2290 self.target_butler.registry.refresh() 

2291 

2292 # Now transfer them to the second butler, including dimensions. 

2293 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2294 transferred = self.target_butler.transfer_from( 

2295 self.source_butler, 

2296 source_refs, 

2297 register_dataset_types=True, 

2298 transfer_dimensions=True, 

2299 ) 

2300 self.assertEqual(len(transferred), n_expected) 

2301 log_output = ";".join(log_cm.output) 

2302 

2303 # A ChainedDatastore will use the in-memory datastore for mexists 

2304 # so we can not rely on the mexists log message. 

2305 self.assertIn("Number of datastore records found in source", log_output) 

2306 self.assertIn("Creating output run", log_output) 

2307 

2308 # Do the transfer twice to ensure that it will do nothing extra. 

2309 # Only do this if purge=True because it does not work for int 

2310 # dataset_id. 

2311 if purge: 

2312 # This should not need to register dataset types. 

2313 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2314 self.assertEqual(len(transferred), n_expected) 

2315 

2316 # Also do an explicit low-level transfer to trigger some 

2317 # edge cases. 

2318 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2319 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs) 

2320 log_output = ";".join(log_cm.output) 

2321 self.assertIn("no file artifacts exist", log_output) 

2322 

2323 with self.assertRaises((TypeError, AttributeError)): 

2324 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2325 

2326 with self.assertRaises(ValueError): 

2327 self.target_butler._datastore.transfer_from( 

2328 self.source_butler._datastore, source_refs, transfer="split" 

2329 ) 

2330 

2331 # Now try to get the same refs from the new butler. 

2332 for ref in source_refs: 

2333 if ref not in deleted: 

2334 new_metric = self.target_butler.get(ref) 

2335 old_metric = self.source_butler.get(ref) 

2336 self.assertEqual(new_metric, old_metric) 

2337 

2338 # Now prune run2 collection and create instead a CHAINED collection. 

2339 # This should block the transfer. 

2340 self.target_butler.removeRuns(["run2"], unstore=True) 

2341 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2342 with self.assertRaises(CollectionTypeError): 

2343 # Re-importing the run1 datasets can be problematic if they 

2344 # use integer IDs so filter those out. 

2345 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2346 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2347 

2348 

2349class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2350 """Test transfers using a chained datastore.""" 

2351 

2352 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2353 

2354 

2355class NullDatastoreTestCase(unittest.TestCase): 

2356 """Test that we can fall back to a null datastore.""" 

2357 

2358 # Need a good config to create the repo. 

2359 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2360 storageClassFactory: StorageClassFactory 

2361 

2362 @classmethod 

2363 def setUpClass(cls) -> None: 

2364 cls.storageClassFactory = StorageClassFactory() 

2365 cls.storageClassFactory.addFromConfig(cls.configFile) 

2366 

2367 def setUp(self) -> None: 

2368 """Create a new butler root for each test.""" 

2369 self.root = makeTestTempDir(TESTDIR) 

2370 Butler.makeRepo(self.root, config=Config(self.configFile)) 

2371 

2372 def tearDown(self) -> None: 

2373 removeTestTempDir(self.root) 

2374 

2375 def test_fallback(self) -> None: 

2376 # Read the butler config and mess with the datastore section. 

2377 bad_config = Config(os.path.join(self.root, "butler.yaml")) 

2378 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" 

2379 

2380 with self.assertRaises(RuntimeError): 

2381 Butler(bad_config) 

2382 

2383 butler = Butler(bad_config, writeable=True, without_datastore=True) 

2384 self.assertIsInstance(butler._datastore, NullDatastore) 

2385 

2386 # Check that registry is working. 

2387 butler.registry.registerRun("MYRUN") 

2388 collections = butler.registry.queryCollections(...) 

2389 self.assertIn("MYRUN", set(collections)) 

2390 

2391 # Create a ref. 

2392 dimensions = butler.dimensions.extract([]) 

2393 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2394 datasetTypeName = "metric" 

2395 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2396 butler.registry.registerDatasetType(datasetType) 

2397 ref = DatasetRef(datasetType, {}, run="MYRUN") 

2398 

2399 # Check that datastore will complain. 

2400 with self.assertRaises(FileNotFoundError): 

2401 butler.get(ref) 

2402 with self.assertRaises(FileNotFoundError): 

2403 butler.getURI(ref) 

2404 

2405 

2406def setup_module(module: types.ModuleType) -> None: 

2407 """Set up the module for pytest.""" 

2408 clean_environment() 

2409 

2410 

2411if __name__ == "__main__": 

2412 clean_environment() 

2413 unittest.main()