Coverage for tests/test_butler.py: 13%

1295 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-12 09:20 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24from __future__ import annotations 

25 

26import gc 

27import json 

28import logging 

29import os 

30import pathlib 

31import pickle 

32import posixpath 

33import random 

34import shutil 

35import string 

36import tempfile 

37import unittest 

38import uuid 

39from collections.abc import Mapping 

40from typing import TYPE_CHECKING, Any, cast 

41 

42try: 

43 import boto3 

44 import botocore 

45 from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

46 from moto import mock_s3 # type: ignore[import] 

47except ImportError: 

48 boto3 = None 

49 

50 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] 

51 """No-op decorator in case moto mock_s3 can not be imported.""" 

52 return None 

53 

54 

55try: 

56 # It's possible but silly to have testing.postgresql installed without 

57 # having the postgresql server installed (because then nothing in 

58 # testing.postgresql would work), so we use the presence of that module 

59 # to test whether we can expect the server to be available. 

60 import testing.postgresql # type: ignore[import] 

61except ImportError: 

62 testing = None 

63 

64import astropy.time 

65import sqlalchemy 

66from lsst.daf.butler import ( 

67 Butler, 

68 ButlerConfig, 

69 ButlerRepoIndex, 

70 CollectionType, 

71 Config, 

72 DataCoordinate, 

73 DatasetExistence, 

74 DatasetRef, 

75 DatasetType, 

76 FileDataset, 

77 FileTemplate, 

78 FileTemplateValidationError, 

79 NullDatastore, 

80 StorageClassFactory, 

81 ValidationError, 

82 script, 

83) 

84from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

85from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

86from lsst.daf.butler.registries.sql import SqlRegistry 

87from lsst.daf.butler.registry import ( 

88 CollectionError, 

89 CollectionTypeError, 

90 ConflictingDefinitionError, 

91 DataIdValueError, 

92 MissingCollectionError, 

93 OrphanedRecordError, 

94) 

95from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

96from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

97from lsst.resources import ResourcePath 

98from lsst.utils import doImportType 

99from lsst.utils.introspection import get_full_type_name 

100 

101if TYPE_CHECKING: 

102 import types 

103 

104 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass 

105 

106TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

107 

108 

109def clean_environment() -> None: 

110 """Remove external environment variables that affect the tests.""" 

111 for k in ( 

112 "DAF_BUTLER_REPOSITORY_INDEX", 

113 "S3_ENDPOINT_URL", 

114 "AWS_ACCESS_KEY_ID", 

115 "AWS_SECRET_ACCESS_KEY", 

116 "AWS_SHARED_CREDENTIALS_FILE", 

117 ): 

118 os.environ.pop(k, None) 

119 

120 

121def makeExampleMetrics() -> MetricsExample: 

122 """Return example dataset suitable for tests.""" 

123 return MetricsExample( 

124 {"AM1": 5.2, "AM2": 30.6}, 

125 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

126 [563, 234, 456.7, 752, 8, 9, 27], 

127 ) 

128 

129 

130class TransactionTestError(Exception): 

131 """Specific error for testing transactions, to prevent misdiagnosing 

132 that might otherwise occur when a standard exception is used. 

133 """ 

134 

135 pass 

136 

137 

138class ButlerConfigTests(unittest.TestCase): 

139 """Simple tests for ButlerConfig that are not tested in any other test 

140 cases. 

141 """ 

142 

143 def testSearchPath(self) -> None: 

144 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

145 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

146 config1 = ButlerConfig(configFile) 

147 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

148 

149 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

150 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

151 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

152 self.assertIn("testConfigs", "\n".join(cm.output)) 

153 

154 key = ("datastore", "records", "table") 

155 self.assertNotEqual(config1[key], config2[key]) 

156 self.assertEqual(config2[key], "override_record") 

157 

158 

159class ButlerPutGetTests(TestCaseMixin): 

160 """Helper method for running a suite of put/get tests from different 

161 butler configurations. 

162 """ 

163 

164 root: str 

165 default_run = "ingésτ😺" 

166 storageClassFactory: StorageClassFactory 

167 configFile: str 

168 tmpConfigFile: str 

169 

170 @staticmethod 

171 def addDatasetType( 

172 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry 

173 ) -> DatasetType: 

174 """Create a DatasetType and register it""" 

175 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

176 registry.registerDatasetType(datasetType) 

177 return datasetType 

178 

179 @classmethod 

180 def setUpClass(cls) -> None: 

181 cls.storageClassFactory = StorageClassFactory() 

182 cls.storageClassFactory.addFromConfig(cls.configFile) 

183 

184 def assertGetComponents( 

185 self, 

186 butler: Butler, 

187 datasetRef: DatasetRef, 

188 components: tuple[str, ...], 

189 reference: Any, 

190 collections: Any = None, 

191 ) -> None: 

192 datasetType = datasetRef.datasetType 

193 dataId = datasetRef.dataId 

194 deferred = butler.getDeferred(datasetRef) 

195 

196 for component in components: 

197 compTypeName = datasetType.componentTypeName(component) 

198 result = butler.get(compTypeName, dataId, collections=collections) 

199 self.assertEqual(result, getattr(reference, component)) 

200 result_deferred = deferred.get(component=component) 

201 self.assertEqual(result_deferred, result) 

202 

203 def tearDown(self) -> None: 

204 removeTestTempDir(self.root) 

205 

206 def create_butler( 

207 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

208 ) -> tuple[Butler, DatasetType]: 

209 butler = Butler(self.tmpConfigFile, run=run) 

210 

211 collections = set(butler.registry.queryCollections()) 

212 self.assertEqual(collections, {run}) 

213 

214 # Create and register a DatasetType 

215 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

216 

217 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

218 

219 # Add needed Dimensions 

220 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

221 butler.registry.insertDimensionData( 

222 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

223 ) 

224 butler.registry.insertDimensionData( 

225 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

226 ) 

227 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

228 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

229 butler.registry.insertDimensionData( 

230 "visit", 

231 { 

232 "instrument": "DummyCamComp", 

233 "id": 423, 

234 "name": "fourtwentythree", 

235 "physical_filter": "d-r", 

236 "visit_system": 1, 

237 "datetime_begin": visit_start, 

238 "datetime_end": visit_end, 

239 }, 

240 ) 

241 

242 # Add more visits for some later tests 

243 for visit_id in (424, 425): 

244 butler.registry.insertDimensionData( 

245 "visit", 

246 { 

247 "instrument": "DummyCamComp", 

248 "id": visit_id, 

249 "name": f"fourtwentyfour_{visit_id}", 

250 "physical_filter": "d-r", 

251 "visit_system": 1, 

252 }, 

253 ) 

254 return butler, datasetType 

255 

256 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: 

257 # New datasets will be added to run and tag, but we will only look in 

258 # tag when looking up datasets. 

259 run = self.default_run 

260 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

261 assert butler.run is not None 

262 

263 # Create and store a dataset 

264 metric = makeExampleMetrics() 

265 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

266 

267 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

268 # and once with a DatasetType 

269 

270 # Keep track of any collections we add and do not clean up 

271 expected_collections = {run} 

272 

273 counter = 0 

274 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

275 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

276 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

277 # Since we are using subTest we can get cascading failures 

278 # here with the first attempt failing and the others failing 

279 # immediately because the dataset already exists. Work around 

280 # this by using a distinct run collection each time 

281 counter += 1 

282 this_run = f"put_run_{counter}" 

283 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

284 expected_collections.update({this_run}) 

285 

286 with self.subTest(args=args): 

287 kwargs: dict[str, Any] = {} 

288 if not isinstance(args[0], DatasetRef): # type: ignore 

289 kwargs["run"] = this_run 

290 ref = butler.put(metric, *args, **kwargs) 

291 self.assertIsInstance(ref, DatasetRef) 

292 

293 # Test getDirect 

294 metricOut = butler.get(ref) 

295 self.assertEqual(metric, metricOut) 

296 # Test get 

297 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

298 self.assertEqual(metric, metricOut) 

299 # Test get with a datasetRef 

300 metricOut = butler.get(ref) 

301 self.assertEqual(metric, metricOut) 

302 # Test getDeferred with dataId 

303 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

304 self.assertEqual(metric, metricOut) 

305 # Test getDeferred with a ref 

306 metricOut = butler.getDeferred(ref).get() 

307 self.assertEqual(metric, metricOut) 

308 

309 # Check we can get components 

310 if storageClass.isComposite(): 

311 self.assertGetComponents( 

312 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

313 ) 

314 

315 # Can the artifacts themselves be retrieved? 

316 if not butler._datastore.isEphemeral: 

317 root_uri = ResourcePath(self.root) 

318 

319 for preserve_path in (True, False): 

320 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

321 # Use copy so that we can test that overwrite 

322 # protection works (using "auto" for File URIs would 

323 # use hard links and subsequent transfer would work 

324 # because it knows they are the same file). 

325 transferred = butler.retrieveArtifacts( 

326 [ref], destination, preserve_path=preserve_path, transfer="copy" 

327 ) 

328 self.assertGreater(len(transferred), 0) 

329 artifacts = list(ResourcePath.findFileResources([destination])) 

330 self.assertEqual(set(transferred), set(artifacts)) 

331 

332 for artifact in transferred: 

333 path_in_destination = artifact.relative_to(destination) 

334 self.assertIsNotNone(path_in_destination) 

335 assert path_in_destination is not None 

336 

337 # when path is not preserved there should not be 

338 # any path separators. 

339 num_seps = path_in_destination.count("/") 

340 if preserve_path: 

341 self.assertGreater(num_seps, 0) 

342 else: 

343 self.assertEqual(num_seps, 0) 

344 

345 primary_uri, secondary_uris = butler.getURIs(ref) 

346 n_uris = len(secondary_uris) 

347 if primary_uri: 

348 n_uris += 1 

349 self.assertEqual( 

350 len(artifacts), 

351 n_uris, 

352 "Comparing expected artifacts vs actual:" 

353 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

354 ) 

355 

356 if preserve_path: 

357 # No need to run these twice 

358 with self.assertRaises(ValueError): 

359 butler.retrieveArtifacts([ref], destination, transfer="move") 

360 

361 with self.assertRaises(FileExistsError): 

362 butler.retrieveArtifacts([ref], destination) 

363 

364 transferred_again = butler.retrieveArtifacts( 

365 [ref], destination, preserve_path=preserve_path, overwrite=True 

366 ) 

367 self.assertEqual(set(transferred_again), set(transferred)) 

368 

369 # Now remove the dataset completely. 

370 butler.pruneDatasets([ref], purge=True, unstore=True) 

371 # Lookup with original args should still fail. 

372 kwargs = {"collections": this_run} 

373 if isinstance(args[0], DatasetRef): 

374 kwargs = {} # Prevent warning from being issued. 

375 self.assertFalse(butler.exists(*args, **kwargs)) 

376 # get() should still fail. 

377 with self.assertRaises(FileNotFoundError): 

378 butler.get(ref) 

379 # Registry shouldn't be able to find it by dataset_id anymore. 

380 self.assertIsNone(butler.registry.getDataset(ref.id)) 

381 

382 # Do explicit registry removal since we know they are 

383 # empty 

384 butler.registry.removeCollection(this_run) 

385 expected_collections.remove(this_run) 

386 

387 # Create DatasetRef for put using default run. 

388 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

389 

390 # Check that getDeferred fails with standalone ref. 

391 with self.assertRaises(LookupError): 

392 butler.getDeferred(refIn) 

393 

394 # Put the dataset again, since the last thing we did was remove it 

395 # and we want to use the default collection. 

396 ref = butler.put(metric, refIn) 

397 

398 # Get with parameters 

399 stop = 4 

400 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

401 self.assertNotEqual(metric, sliced) 

402 self.assertEqual(metric.summary, sliced.summary) 

403 self.assertEqual(metric.output, sliced.output) 

404 assert metric.data is not None # for mypy 

405 self.assertEqual(metric.data[:stop], sliced.data) 

406 # getDeferred with parameters 

407 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

408 self.assertNotEqual(metric, sliced) 

409 self.assertEqual(metric.summary, sliced.summary) 

410 self.assertEqual(metric.output, sliced.output) 

411 self.assertEqual(metric.data[:stop], sliced.data) 

412 # getDeferred with deferred parameters 

413 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

414 self.assertNotEqual(metric, sliced) 

415 self.assertEqual(metric.summary, sliced.summary) 

416 self.assertEqual(metric.output, sliced.output) 

417 self.assertEqual(metric.data[:stop], sliced.data) 

418 

419 if storageClass.isComposite(): 

420 # Check that components can be retrieved 

421 metricOut = butler.get(ref.datasetType.name, dataId) 

422 compNameS = ref.datasetType.componentTypeName("summary") 

423 compNameD = ref.datasetType.componentTypeName("data") 

424 summary = butler.get(compNameS, dataId) 

425 self.assertEqual(summary, metric.summary) 

426 data = butler.get(compNameD, dataId) 

427 self.assertEqual(data, metric.data) 

428 

429 if "counter" in storageClass.derivedComponents: 

430 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

431 self.assertEqual(count, len(data)) 

432 

433 count = butler.get( 

434 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

435 ) 

436 self.assertEqual(count, stop) 

437 

438 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

439 assert compRef is not None 

440 summary = butler.get(compRef) 

441 self.assertEqual(summary, metric.summary) 

442 

443 # Create a Dataset type that has the same name but is inconsistent. 

444 inconsistentDatasetType = DatasetType( 

445 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

446 ) 

447 

448 # Getting with a dataset type that does not match registry fails 

449 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"): 

450 butler.get(inconsistentDatasetType, dataId) 

451 

452 # Combining a DatasetRef with a dataId should fail 

453 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

454 butler.get(ref, dataId) 

455 # Getting with an explicit ref should fail if the id doesn't match. 

456 with self.assertRaises(FileNotFoundError): 

457 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

458 

459 # Getting a dataset with unknown parameters should fail 

460 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

461 butler.get(ref, parameters={"unsupported": True}) 

462 

463 # Check we have a collection 

464 collections = set(butler.registry.queryCollections()) 

465 self.assertEqual(collections, expected_collections) 

466 

467 # Clean up to check that we can remove something that may have 

468 # already had a component removed 

469 butler.pruneDatasets([ref], unstore=True, purge=True) 

470 

471 # Add the same ref again, so we can check that duplicate put fails. 

472 ref = butler.put(metric, datasetType, dataId) 

473 

474 # Repeat put will fail. 

475 with self.assertRaisesRegex( 

476 ConflictingDefinitionError, "A database constraint failure was triggered" 

477 ): 

478 butler.put(metric, datasetType, dataId) 

479 

480 # Remove the datastore entry. 

481 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

482 

483 # Put will still fail 

484 with self.assertRaisesRegex( 

485 ConflictingDefinitionError, "A database constraint failure was triggered" 

486 ): 

487 butler.put(metric, datasetType, dataId) 

488 

489 # Repeat the same sequence with resolved ref. 

490 butler.pruneDatasets([ref], unstore=True, purge=True) 

491 ref = butler.put(metric, refIn) 

492 

493 # Repeat put will fail. 

494 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

495 butler.put(metric, refIn) 

496 

497 # Remove the datastore entry. 

498 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

499 

500 # In case of resolved ref this write will succeed. 

501 ref = butler.put(metric, refIn) 

502 

503 # Leave the dataset in place since some downstream tests require 

504 # something to be present 

505 

506 return butler 

507 

508 def testDeferredCollectionPassing(self) -> None: 

509 # Construct a butler with no run or collection, but make it writeable. 

510 butler = Butler(self.tmpConfigFile, writeable=True) 

511 # Create and register a DatasetType 

512 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

513 datasetType = self.addDatasetType( 

514 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

515 ) 

516 # Add needed Dimensions 

517 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

518 butler.registry.insertDimensionData( 

519 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

520 ) 

521 butler.registry.insertDimensionData( 

522 "visit", 

523 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

524 ) 

525 dataId = {"instrument": "DummyCamComp", "visit": 423} 

526 # Create dataset. 

527 metric = makeExampleMetrics() 

528 # Register a new run and put dataset. 

529 run = "deferred" 

530 self.assertTrue(butler.registry.registerRun(run)) 

531 # Second time it will be allowed but indicate no-op 

532 self.assertFalse(butler.registry.registerRun(run)) 

533 ref = butler.put(metric, datasetType, dataId, run=run) 

534 # Putting with no run should fail with TypeError. 

535 with self.assertRaises(CollectionError): 

536 butler.put(metric, datasetType, dataId) 

537 # Dataset should exist. 

538 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

539 # We should be able to get the dataset back, but with and without 

540 # a deferred dataset handle. 

541 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

542 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

543 # Trying to find the dataset without any collection is a TypeError. 

544 self.assertFalse(butler.exists(datasetType, dataId)) 

545 with self.assertRaises(CollectionError): 

546 butler.get(datasetType, dataId) 

547 # Associate the dataset with a different collection. 

548 butler.registry.registerCollection("tagged") 

549 butler.registry.associate("tagged", [ref]) 

550 # Deleting the dataset from the new collection should make it findable 

551 # in the original collection. 

552 butler.pruneDatasets([ref], tags=["tagged"]) 

553 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

554 

555 

556class ButlerTests(ButlerPutGetTests): 

557 """Tests for Butler.""" 

558 

559 useTempRoot = True 

560 validationCanFail: bool 

561 fullConfigKey: str | None 

562 registryStr: str | None 

563 datastoreName: list[str] | None 

564 datastoreStr: list[str] 

565 

566 def setUp(self) -> None: 

567 """Create a new butler root for each test.""" 

568 self.root = makeTestTempDir(TESTDIR) 

569 Butler.makeRepo(self.root, config=Config(self.configFile)) 

570 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

571 

572 def testConstructor(self) -> None: 

573 """Independent test of constructor.""" 

574 butler = Butler(self.tmpConfigFile, run=self.default_run) 

575 self.assertIsInstance(butler, Butler) 

576 

577 # Check that butler.yaml is added automatically. 

578 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

579 config_dir = self.tmpConfigFile[: -len(end)] 

580 butler = Butler(config_dir, run=self.default_run) 

581 self.assertIsInstance(butler, Butler) 

582 

583 # Even with a ResourcePath. 

584 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

585 self.assertIsInstance(butler, Butler) 

586 

587 collections = set(butler.registry.queryCollections()) 

588 self.assertEqual(collections, {self.default_run}) 

589 

590 # Check that some special characters can be included in run name. 

591 special_run = "u@b.c-A" 

592 butler_special = Butler(butler=butler, run=special_run) 

593 collections = set(butler_special.registry.queryCollections("*@*")) 

594 self.assertEqual(collections, {special_run}) 

595 

596 butler2 = Butler(butler=butler, collections=["other"]) 

597 self.assertEqual(butler2.collections, ("other",)) 

598 self.assertIsNone(butler2.run) 

599 self.assertIs(butler._datastore, butler2._datastore) 

600 

601 # Test that we can use an environment variable to find this 

602 # repository. 

603 butler_index = Config() 

604 butler_index["label"] = self.tmpConfigFile 

605 for suffix in (".yaml", ".json"): 

606 # Ensure that the content differs so that we know that 

607 # we aren't reusing the cache. 

608 bad_label = f"file://bucket/not_real{suffix}" 

609 butler_index["bad_label"] = bad_label 

610 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

611 butler_index.dumpToUri(temp_file) 

612 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

613 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"}) 

614 uri = Butler.get_repo_uri("bad_label") 

615 self.assertEqual(uri, ResourcePath(bad_label)) 

616 uri = Butler.get_repo_uri("label") 

617 butler = Butler(uri, writeable=False) 

618 self.assertIsInstance(butler, Butler) 

619 butler = Butler("label", writeable=False) 

620 self.assertIsInstance(butler, Butler) 

621 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

622 Butler("not_there", writeable=False) 

623 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): 

624 Butler("bad_label") 

625 with self.assertRaises(FileNotFoundError): 

626 # Should ignore aliases. 

627 Butler(ResourcePath("label", forceAbsolute=False)) 

628 with self.assertRaises(KeyError) as cm: 

629 Butler.get_repo_uri("missing") 

630 self.assertEqual( 

631 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False) 

632 ) 

633 self.assertIn("not known to", str(cm.exception)) 

634 # Should report no failure. 

635 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "") 

636 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

637 # Now with empty configuration. 

638 butler_index = Config() 

639 butler_index.dumpToUri(temp_file) 

640 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

641 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): 

642 Butler("label") 

643 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

644 # Now with bad contents. 

645 with open(temp_file.ospath, "w") as fh: 

646 print("'", file=fh) 

647 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

648 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): 

649 Butler("label") 

650 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

651 with self.assertRaises(FileNotFoundError): 

652 Butler.get_repo_uri("label") 

653 self.assertEqual(Butler.get_known_repos(), set()) 

654 

655 with self.assertRaisesRegex(FileNotFoundError, "index file not found"): 

656 Butler("label") 

657 

658 # Check that we can create Butler when the alias file is not found. 

659 butler = Butler(self.tmpConfigFile, writeable=False) 

660 self.assertIsInstance(butler, Butler) 

661 with self.assertRaises(KeyError) as cm: 

662 # No environment variable set. 

663 Butler.get_repo_uri("label") 

664 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) 

665 self.assertIn("No repository index defined", str(cm.exception)) 

666 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): 

667 # No aliases registered. 

668 Butler("not_there") 

669 self.assertEqual(Butler.get_known_repos(), set()) 

670 

671 def testBasicPutGet(self) -> None: 

672 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

673 self.runPutGetTest(storageClass, "test_metric") 

674 

675 def testCompositePutGetConcrete(self) -> None: 

676 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

677 butler = self.runPutGetTest(storageClass, "test_metric") 

678 

679 # Should *not* be disassembled 

680 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

681 self.assertEqual(len(datasets), 1) 

682 uri, components = butler.getURIs(datasets[0]) 

683 self.assertIsInstance(uri, ResourcePath) 

684 self.assertFalse(components) 

685 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

686 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

687 

688 # Predicted dataset 

689 dataId = {"instrument": "DummyCamComp", "visit": 424} 

690 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

691 self.assertFalse(components) 

692 self.assertIsInstance(uri, ResourcePath) 

693 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

694 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

695 

696 def testCompositePutGetVirtual(self) -> None: 

697 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

698 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

699 

700 # Should be disassembled 

701 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

702 self.assertEqual(len(datasets), 1) 

703 uri, components = butler.getURIs(datasets[0]) 

704 

705 if butler._datastore.isEphemeral: 

706 # Never disassemble in-memory datastore 

707 self.assertIsInstance(uri, ResourcePath) 

708 self.assertFalse(components) 

709 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

710 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

711 else: 

712 self.assertIsNone(uri) 

713 self.assertEqual(set(components), set(storageClass.components)) 

714 for compuri in components.values(): 

715 self.assertIsInstance(compuri, ResourcePath) 

716 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

717 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

718 

719 # Predicted dataset 

720 dataId = {"instrument": "DummyCamComp", "visit": 424} 

721 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

722 

723 if butler._datastore.isEphemeral: 

724 # Never disassembled 

725 self.assertIsInstance(uri, ResourcePath) 

726 self.assertFalse(components) 

727 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

728 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

729 else: 

730 self.assertIsNone(uri) 

731 self.assertEqual(set(components), set(storageClass.components)) 

732 for compuri in components.values(): 

733 self.assertIsInstance(compuri, ResourcePath) 

734 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

735 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

736 

737 def testStorageClassOverrideGet(self) -> None: 

738 """Test storage class conversion on get with override.""" 

739 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

740 datasetTypeName = "anything" 

741 run = self.default_run 

742 

743 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

744 

745 # Create and store a dataset. 

746 metric = makeExampleMetrics() 

747 dataId = {"instrument": "DummyCamComp", "visit": 423} 

748 

749 ref = butler.put(metric, datasetType, dataId) 

750 

751 # Return native type. 

752 retrieved = butler.get(ref) 

753 self.assertEqual(retrieved, metric) 

754 

755 # Specify an override. 

756 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

757 model = butler.get(ref, storageClass=new_sc) 

758 self.assertNotEqual(type(model), type(retrieved)) 

759 self.assertIs(type(model), new_sc.pytype) 

760 self.assertEqual(retrieved, model) 

761 

762 # Defer but override later. 

763 deferred = butler.getDeferred(ref) 

764 model = deferred.get(storageClass=new_sc) 

765 self.assertIs(type(model), new_sc.pytype) 

766 self.assertEqual(retrieved, model) 

767 

768 # Defer but override up front. 

769 deferred = butler.getDeferred(ref, storageClass=new_sc) 

770 model = deferred.get() 

771 self.assertIs(type(model), new_sc.pytype) 

772 self.assertEqual(retrieved, model) 

773 

774 # Retrieve a component. Should be a tuple. 

775 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

776 self.assertIs(type(data), tuple) 

777 self.assertEqual(data, tuple(retrieved.data)) 

778 

779 # Parameter on the write storage class should work regardless 

780 # of read storage class. 

781 data = butler.get( 

782 "anything.data", 

783 dataId, 

784 storageClass="StructuredDataDataTestTuple", 

785 parameters={"slice": slice(2, 4)}, 

786 ) 

787 self.assertEqual(len(data), 2) 

788 

789 # Try a parameter that is known to the read storage class but not 

790 # the write storage class. 

791 with self.assertRaises(KeyError): 

792 butler.get( 

793 "anything.data", 

794 dataId, 

795 storageClass="StructuredDataDataTestTuple", 

796 parameters={"xslice": slice(2, 4)}, 

797 ) 

798 

799 def testPytypePutCoercion(self) -> None: 

800 """Test python type coercion on Butler.get and put.""" 

801 # Store some data with the normal example storage class. 

802 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

803 datasetTypeName = "test_metric" 

804 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

805 

806 dataId = {"instrument": "DummyCamComp", "visit": 423} 

807 

808 # Put a dict and this should coerce to a MetricsExample 

809 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

810 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

811 test_metric = butler.get(metric_ref) 

812 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

813 self.assertEqual(test_metric.summary, test_dict["summary"]) 

814 self.assertEqual(test_metric.output, test_dict["output"]) 

815 

816 # Check that the put still works if a DatasetType is given with 

817 # a definition matching this python type. 

818 registry_type = butler.registry.getDatasetType(datasetTypeName) 

819 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

820 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

821 self.assertEqual(metric2_ref.datasetType, registry_type) 

822 

823 # The get will return the type expected by registry. 

824 test_metric2 = butler.get(metric2_ref) 

825 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

826 

827 # Make a new DatasetRef with the compatible but different DatasetType. 

828 # This should now return a dict. 

829 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

830 test_dict2 = butler.get(new_ref) 

831 self.assertEqual(get_full_type_name(test_dict2), "dict") 

832 

833 # Get it again with the wrong dataset type definition using get() 

834 # rather than get(). This should be consistent with get() 

835 # behavior and return the type of the DatasetType. 

836 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

837 self.assertEqual(get_full_type_name(test_dict3), "dict") 

838 

839 def testIngest(self) -> None: 

840 butler = Butler(self.tmpConfigFile, run=self.default_run) 

841 

842 # Create and register a DatasetType 

843 dimensions = butler.dimensions.extract(["instrument", "visit", "detector"]) 

844 

845 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

846 datasetTypeName = "metric" 

847 

848 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

849 

850 # Add needed Dimensions 

851 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

852 butler.registry.insertDimensionData( 

853 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

854 ) 

855 for detector in (1, 2): 

856 butler.registry.insertDimensionData( 

857 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

858 ) 

859 

860 butler.registry.insertDimensionData( 

861 "visit", 

862 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

863 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

864 ) 

865 

866 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

867 dataRoot = os.path.join(TESTDIR, "data", "basic") 

868 datasets = [] 

869 for detector in (1, 2): 

870 detector_name = f"detector_{detector}" 

871 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

872 dataId = butler.registry.expandDataId( 

873 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

874 ) 

875 # Create a DatasetRef for ingest 

876 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

877 

878 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

879 

880 butler.ingest(*datasets, transfer="copy") 

881 

882 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

883 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

884 

885 metrics1 = butler.get(datasetTypeName, dataId1) 

886 metrics2 = butler.get(datasetTypeName, dataId2) 

887 self.assertNotEqual(metrics1, metrics2) 

888 

889 # Compare URIs 

890 uri1 = butler.getURI(datasetTypeName, dataId1) 

891 uri2 = butler.getURI(datasetTypeName, dataId2) 

892 self.assertNotEqual(uri1, uri2) 

893 

894 # Now do a multi-dataset but single file ingest 

895 metricFile = os.path.join(dataRoot, "detectors.yaml") 

896 refs = [] 

897 for detector in (1, 2): 

898 detector_name = f"detector_{detector}" 

899 dataId = butler.registry.expandDataId( 

900 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

901 ) 

902 # Create a DatasetRef for ingest 

903 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

904 

905 # Test "move" transfer to ensure that the files themselves 

906 # have disappeared following ingest. 

907 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

908 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

909 

910 datasets = [] 

911 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

912 

913 # For first ingest use copy. 

914 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

915 

916 # Now try to ingest again in "execution butler" mode where 

917 # the registry entries exist but the datastore does not have 

918 # the files. We also need to strip the dimension records to ensure 

919 # that they will be re-added by the ingest. 

920 ref = datasets[0].refs[0] 

921 datasets[0].refs = [ 

922 cast( 

923 DatasetRef, 

924 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run), 

925 ) 

926 for ref in datasets[0].refs 

927 ] 

928 all_refs = [] 

929 for dataset in datasets: 

930 refs = [] 

931 for ref in dataset.refs: 

932 # Create a dict from the dataId to drop the records. 

933 new_data_id = {str(k): v for k, v in ref.dataId.items()} 

934 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run) 

935 assert new_ref is not None 

936 self.assertFalse(new_ref.dataId.hasRecords()) 

937 refs.append(new_ref) 

938 dataset.refs = refs 

939 all_refs.extend(dataset.refs) 

940 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

941 

942 # Use move mode to test that the file is deleted. Also 

943 # disable recording of file size. 

944 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

945 

946 # Check that every ref now has records. 

947 for dataset in datasets: 

948 for ref in dataset.refs: 

949 self.assertTrue(ref.dataId.hasRecords()) 

950 

951 # Ensure that the file has disappeared. 

952 self.assertFalse(tempFile.exists()) 

953 

954 # Check that the datastore recorded no file size. 

955 # Not all datastores can support this. 

956 try: 

957 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

958 self.assertEqual(infos[0].file_size, -1) 

959 except AttributeError: 

960 pass 

961 

962 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

963 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

964 

965 multi1 = butler.get(datasetTypeName, dataId1) 

966 multi2 = butler.get(datasetTypeName, dataId2) 

967 

968 self.assertEqual(multi1, metrics1) 

969 self.assertEqual(multi2, metrics2) 

970 

971 # Compare URIs 

972 uri1 = butler.getURI(datasetTypeName, dataId1) 

973 uri2 = butler.getURI(datasetTypeName, dataId2) 

974 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

975 

976 # Test that removing one does not break the second 

977 # This line will issue a warning log message for a ChainedDatastore 

978 # that uses an InMemoryDatastore since in-memory can not ingest 

979 # files. 

980 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

981 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

982 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

983 multi2b = butler.get(datasetTypeName, dataId2) 

984 self.assertEqual(multi2, multi2b) 

985 

986 # Ensure we can ingest 0 datasets 

987 datasets = [] 

988 butler.ingest(*datasets) 

989 

990 def testPickle(self) -> None: 

991 """Test pickle support.""" 

992 butler = Butler(self.tmpConfigFile, run=self.default_run) 

993 butlerOut = pickle.loads(pickle.dumps(butler)) 

994 self.assertIsInstance(butlerOut, Butler) 

995 self.assertEqual(butlerOut._config, butler._config) 

996 self.assertEqual(butlerOut.collections, butler.collections) 

997 self.assertEqual(butlerOut.run, butler.run) 

998 

999 def testGetDatasetTypes(self) -> None: 

1000 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1001 dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"]) 

1002 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

1003 ( 

1004 "instrument", 

1005 [ 

1006 {"instrument": "DummyCam"}, 

1007 {"instrument": "DummyHSC"}, 

1008 {"instrument": "DummyCamComp"}, 

1009 ], 

1010 ), 

1011 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

1012 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]), 

1013 ] 

1014 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1015 # Add needed Dimensions 

1016 for element, data in dimensionEntries: 

1017 butler.registry.insertDimensionData(element, *data) 

1018 

1019 # When a DatasetType is added to the registry entries are not created 

1020 # for components but querying them can return the components. 

1021 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1022 components = set() 

1023 for datasetTypeName in datasetTypeNames: 

1024 # Create and register a DatasetType 

1025 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1026 

1027 for componentName in storageClass.components: 

1028 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1029 

1030 fromRegistry: set[DatasetType] = set() 

1031 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1032 fromRegistry.add(parent_dataset_type) 

1033 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1034 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1035 

1036 # Now that we have some dataset types registered, validate them 

1037 butler.validateConfiguration( 

1038 ignore=[ 

1039 "test_metric_comp", 

1040 "metric3", 

1041 "metric5", 

1042 "calexp", 

1043 "DummySC", 

1044 "datasetType.component", 

1045 "random_data", 

1046 "random_data_2", 

1047 ] 

1048 ) 

1049 

1050 # Add a new datasetType that will fail template validation 

1051 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1052 if self.validationCanFail: 

1053 with self.assertRaises(ValidationError): 

1054 butler.validateConfiguration() 

1055 

1056 # Rerun validation but with a subset of dataset type names 

1057 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1058 

1059 # Rerun validation but ignore the bad datasetType 

1060 butler.validateConfiguration( 

1061 ignore=[ 

1062 "test_metric_comp", 

1063 "metric3", 

1064 "metric5", 

1065 "calexp", 

1066 "DummySC", 

1067 "datasetType.component", 

1068 "random_data", 

1069 "random_data_2", 

1070 ] 

1071 ) 

1072 

1073 def testTransaction(self) -> None: 

1074 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1075 datasetTypeName = "test_metric" 

1076 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

1077 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1078 ("instrument", {"instrument": "DummyCam"}), 

1079 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1080 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1081 ) 

1082 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1083 metric = makeExampleMetrics() 

1084 dataId = {"instrument": "DummyCam", "visit": 42} 

1085 # Create and register a DatasetType 

1086 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1087 with self.assertRaises(TransactionTestError): 

1088 with butler.transaction(): 

1089 # Add needed Dimensions 

1090 for args in dimensionEntries: 

1091 butler.registry.insertDimensionData(*args) 

1092 # Store a dataset 

1093 ref = butler.put(metric, datasetTypeName, dataId) 

1094 self.assertIsInstance(ref, DatasetRef) 

1095 # Test getDirect 

1096 metricOut = butler.get(ref) 

1097 self.assertEqual(metric, metricOut) 

1098 # Test get 

1099 metricOut = butler.get(datasetTypeName, dataId) 

1100 self.assertEqual(metric, metricOut) 

1101 # Check we can get components 

1102 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1103 raise TransactionTestError("This should roll back the entire transaction") 

1104 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1105 butler.registry.expandDataId(dataId) 

1106 # Should raise LookupError for missing data ID value 

1107 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1108 butler.get(datasetTypeName, dataId) 

1109 # Also check explicitly if Dataset entry is missing 

1110 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1111 # Direct retrieval should not find the file in the Datastore 

1112 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1113 butler.get(ref) 

1114 

1115 def testMakeRepo(self) -> None: 

1116 """Test that we can write butler configuration to a new repository via 

1117 the Butler.makeRepo interface and then instantiate a butler from the 

1118 repo root. 

1119 """ 

1120 # Do not run the test if we know this datastore configuration does 

1121 # not support a file system root 

1122 if self.fullConfigKey is None: 

1123 return 

1124 

1125 # create two separate directories 

1126 root1 = tempfile.mkdtemp(dir=self.root) 

1127 root2 = tempfile.mkdtemp(dir=self.root) 

1128 

1129 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1130 limited = Config(self.configFile) 

1131 butler1 = Butler(butlerConfig) 

1132 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1133 full = Config(self.tmpConfigFile) 

1134 butler2 = Butler(butlerConfig) 

1135 # Butlers should have the same configuration regardless of whether 

1136 # defaults were expanded. 

1137 self.assertEqual(butler1._config, butler2._config) 

1138 # Config files loaded directly should not be the same. 

1139 self.assertNotEqual(limited, full) 

1140 # Make sure "limited" doesn't have a few keys we know it should be 

1141 # inheriting from defaults. 

1142 self.assertIn(self.fullConfigKey, full) 

1143 self.assertNotIn(self.fullConfigKey, limited) 

1144 

1145 # Collections don't appear until something is put in them 

1146 collections1 = set(butler1.registry.queryCollections()) 

1147 self.assertEqual(collections1, set()) 

1148 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1149 

1150 # Check that a config with no associated file name will not 

1151 # work properly with relocatable Butler repo 

1152 butlerConfig.configFile = None 

1153 with self.assertRaises(ValueError): 

1154 Butler(butlerConfig) 

1155 

1156 with self.assertRaises(FileExistsError): 

1157 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1158 

1159 def testStringification(self) -> None: 

1160 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1161 butlerStr = str(butler) 

1162 

1163 if self.datastoreStr is not None: 

1164 for testStr in self.datastoreStr: 

1165 self.assertIn(testStr, butlerStr) 

1166 if self.registryStr is not None: 

1167 self.assertIn(self.registryStr, butlerStr) 

1168 

1169 datastoreName = butler._datastore.name 

1170 if self.datastoreName is not None: 

1171 for testStr in self.datastoreName: 

1172 self.assertIn(testStr, datastoreName) 

1173 

1174 def testButlerRewriteDataId(self) -> None: 

1175 """Test that dataIds can be rewritten based on dimension records.""" 

1176 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1177 

1178 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1179 datasetTypeName = "random_data" 

1180 

1181 # Create dimension records. 

1182 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1183 butler.registry.insertDimensionData( 

1184 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1185 ) 

1186 butler.registry.insertDimensionData( 

1187 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1188 ) 

1189 

1190 dimensions = butler.dimensions.extract(["instrument", "exposure"]) 

1191 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1192 butler.registry.registerDatasetType(datasetType) 

1193 

1194 n_exposures = 5 

1195 dayobs = 20210530 

1196 

1197 for i in range(n_exposures): 

1198 butler.registry.insertDimensionData( 

1199 "exposure", 

1200 { 

1201 "instrument": "DummyCamComp", 

1202 "id": i, 

1203 "obs_id": f"exp{i}", 

1204 "seq_num": i, 

1205 "day_obs": dayobs, 

1206 "physical_filter": "d-r", 

1207 }, 

1208 ) 

1209 

1210 # Write some data. 

1211 for i in range(n_exposures): 

1212 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1213 

1214 # Use the seq_num for the put to test rewriting. 

1215 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1216 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1217 

1218 # Check that the exposure is correct in the dataId 

1219 self.assertEqual(ref.dataId["exposure"], i) 

1220 

1221 # and check that we can get the dataset back with the same dataId 

1222 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1223 self.assertEqual(new_metric, metric) 

1224 

1225 

1226class FileDatastoreButlerTests(ButlerTests): 

1227 """Common tests and specialization of ButlerTests for butlers backed 

1228 by datastores that inherit from FileDatastore. 

1229 """ 

1230 

1231 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1232 """Check if file exists at a given path (relative to root). 

1233 

1234 Test testPutTemplates verifies actual physical existance of the files 

1235 in the requested location. 

1236 """ 

1237 uri = ResourcePath(root, forceDirectory=True) 

1238 return uri.join(relpath).exists() 

1239 

1240 def testPutTemplates(self) -> None: 

1241 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1242 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1243 

1244 # Add needed Dimensions 

1245 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1246 butler.registry.insertDimensionData( 

1247 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1248 ) 

1249 butler.registry.insertDimensionData( 

1250 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1251 ) 

1252 butler.registry.insertDimensionData( 

1253 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1254 ) 

1255 

1256 # Create and store a dataset 

1257 metric = makeExampleMetrics() 

1258 

1259 # Create two almost-identical DatasetTypes (both will use default 

1260 # template) 

1261 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

1262 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1263 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1264 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1265 

1266 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1267 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1268 

1269 # Put with exactly the data ID keys needed 

1270 ref = butler.put(metric, "metric1", dataId1) 

1271 uri = butler.getURI(ref) 

1272 self.assertTrue(uri.exists()) 

1273 self.assertTrue( 

1274 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1275 ) 

1276 

1277 # Check the template based on dimensions 

1278 if hasattr(butler._datastore, "templates"): 

1279 butler._datastore.templates.validateTemplates([ref]) 

1280 

1281 # Put with extra data ID keys (physical_filter is an optional 

1282 # dependency); should not change template (at least the way we're 

1283 # defining them to behave now; the important thing is that they 

1284 # must be consistent). 

1285 ref = butler.put(metric, "metric2", dataId2) 

1286 uri = butler.getURI(ref) 

1287 self.assertTrue(uri.exists()) 

1288 self.assertTrue( 

1289 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1290 ) 

1291 

1292 # Check the template based on dimensions 

1293 if hasattr(butler._datastore, "templates"): 

1294 butler._datastore.templates.validateTemplates([ref]) 

1295 

1296 # Use a template that has a typo in dimension record metadata. 

1297 # Easier to test with a butler that has a ref with records attached. 

1298 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1299 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1300 path = template.format(ref) 

1301 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1302 

1303 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1304 with self.assertRaises(KeyError): 

1305 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1306 template.format(ref) 

1307 

1308 # Now use a file template that will not result in unique filenames 

1309 with self.assertRaises(FileTemplateValidationError): 

1310 butler.put(metric, "metric3", dataId1) 

1311 

1312 def testImportExport(self) -> None: 

1313 # Run put/get tests just to create and populate a repo. 

1314 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1315 self.runImportExportTest(storageClass) 

1316 

1317 @unittest.expectedFailure 

1318 def testImportExportVirtualComposite(self) -> None: 

1319 # Run put/get tests just to create and populate a repo. 

1320 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1321 self.runImportExportTest(storageClass) 

1322 

1323 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1324 """Test exporting and importing. 

1325 

1326 This test does an export to a temp directory and an import back 

1327 into a new temp directory repo. It does not assume a posix datastore. 

1328 """ 

1329 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1330 

1331 # Test that we must have a file extension. 

1332 with self.assertRaises(ValueError): 

1333 with exportButler.export(filename="dump", directory=".") as export: 

1334 pass 

1335 

1336 # Test that unknown format is not allowed. 

1337 with self.assertRaises(ValueError): 

1338 with exportButler.export(filename="dump.fits", directory=".") as export: 

1339 pass 

1340 

1341 # Test that the repo actually has at least one dataset. 

1342 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1343 self.assertGreater(len(datasets), 0) 

1344 # Add a DimensionRecord that's unused by those datasets. 

1345 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1346 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1347 # Export and then import datasets. 

1348 with safeTestTempDir(TESTDIR) as exportDir: 

1349 exportFile = os.path.join(exportDir, "exports.yaml") 

1350 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1351 export.saveDatasets(datasets) 

1352 # Export the same datasets again. This should quietly do 

1353 # nothing because of internal deduplication, and it shouldn't 

1354 # complain about being asked to export the "htm7" elements even 

1355 # though there aren't any in these datasets or in the database. 

1356 export.saveDatasets(datasets, elements=["htm7"]) 

1357 # Save one of the data IDs again; this should be harmless 

1358 # because of internal deduplication. 

1359 export.saveDataIds([datasets[0].dataId]) 

1360 # Save some dimension records directly. 

1361 export.saveDimensionData("skymap", [skymapRecord]) 

1362 self.assertTrue(os.path.exists(exportFile)) 

1363 with safeTestTempDir(TESTDIR) as importDir: 

1364 # We always want this to be a local posix butler 

1365 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1366 # Calling script.butlerImport tests the implementation of the 

1367 # butler command line interface "import" subcommand. Functions 

1368 # in the script folder are generally considered protected and 

1369 # should not be used as public api. 

1370 with open(exportFile) as f: 

1371 script.butlerImport( 

1372 importDir, 

1373 export_file=f, 

1374 directory=exportDir, 

1375 transfer="auto", 

1376 skip_dimensions=None, 

1377 ) 

1378 importButler = Butler(importDir, run=self.default_run) 

1379 for ref in datasets: 

1380 with self.subTest(ref=ref): 

1381 # Test for existence by passing in the DatasetType and 

1382 # data ID separately, to avoid lookup by dataset_id. 

1383 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1384 self.assertEqual( 

1385 list(importButler.registry.queryDimensionRecords("skymap")), 

1386 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)], 

1387 ) 

1388 

1389 def testRemoveRuns(self) -> None: 

1390 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1391 butler = Butler(self.tmpConfigFile, writeable=True) 

1392 # Load registry data with dimensions to hang datasets off of. 

1393 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1394 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1395 # Add some RUN-type collection. 

1396 run1 = "run1" 

1397 butler.registry.registerRun(run1) 

1398 run2 = "run2" 

1399 butler.registry.registerRun(run2) 

1400 # put a dataset in each 

1401 metric = makeExampleMetrics() 

1402 dimensions = butler.dimensions.extract(["instrument", "physical_filter"]) 

1403 datasetType = self.addDatasetType( 

1404 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1405 ) 

1406 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1407 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1408 uri1 = butler.getURI(ref1) 

1409 uri2 = butler.getURI(ref2) 

1410 

1411 with self.assertRaises(OrphanedRecordError): 

1412 butler.registry.removeDatasetType(datasetType.name) 

1413 

1414 # Remove from both runs with different values for unstore. 

1415 butler.removeRuns([run1], unstore=True) 

1416 butler.removeRuns([run2], unstore=False) 

1417 # Should be nothing in registry for either one, and datastore should 

1418 # not think either exists. 

1419 with self.assertRaises(MissingCollectionError): 

1420 butler.registry.getCollectionType(run1) 

1421 with self.assertRaises(MissingCollectionError): 

1422 butler.registry.getCollectionType(run2) 

1423 self.assertFalse(butler.stored(ref1)) 

1424 self.assertFalse(butler.stored(ref2)) 

1425 # The ref we unstored should be gone according to the URI, but the 

1426 # one we forgot should still be around. 

1427 self.assertFalse(uri1.exists()) 

1428 self.assertTrue(uri2.exists()) 

1429 

1430 # Now that the collections have been pruned we can remove the 

1431 # dataset type 

1432 butler.registry.removeDatasetType(datasetType.name) 

1433 

1434 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm: 

1435 butler.registry.removeDatasetType(("test*", "test*")) 

1436 self.assertIn("not defined", "\n".join(cm.output)) 

1437 

1438 

1439class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1440 """PosixDatastore specialization of a butler""" 

1441 

1442 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1443 fullConfigKey: str | None = ".datastore.formatters" 

1444 validationCanFail = True 

1445 datastoreStr = ["/tmp"] 

1446 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1447 registryStr = "/gen3.sqlite3" 

1448 

1449 def testPathConstructor(self) -> None: 

1450 """Independent test of constructor using PathLike.""" 

1451 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1452 self.assertIsInstance(butler, Butler) 

1453 

1454 # And again with a Path object with the butler yaml 

1455 path = pathlib.Path(self.tmpConfigFile) 

1456 butler = Butler(path, writeable=False) 

1457 self.assertIsInstance(butler, Butler) 

1458 

1459 # And again with a Path object without the butler yaml 

1460 # (making sure we skip it if the tmp config doesn't end 

1461 # in butler.yaml -- which is the case for a subclass) 

1462 if self.tmpConfigFile.endswith("butler.yaml"): 

1463 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1464 butler = Butler(path, writeable=False) 

1465 self.assertIsInstance(butler, Butler) 

1466 

1467 def testExportTransferCopy(self) -> None: 

1468 """Test local export using all transfer modes""" 

1469 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1470 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1471 # Test that the repo actually has at least one dataset. 

1472 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1473 self.assertGreater(len(datasets), 0) 

1474 uris = [exportButler.getURI(d) for d in datasets] 

1475 assert isinstance(exportButler._datastore, FileDatastore) 

1476 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]] 

1477 

1478 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1479 

1480 for path in pathsInStore: 

1481 # Assume local file system 

1482 assert path is not None 

1483 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1484 

1485 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1486 with safeTestTempDir(TESTDIR) as exportDir: 

1487 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1488 export.saveDatasets(datasets) 

1489 for path in pathsInStore: 

1490 assert path is not None 

1491 self.assertTrue( 

1492 self.checkFileExists(exportDir, path), 

1493 f"Check that mode {transfer} exported files", 

1494 ) 

1495 

1496 def testPruneDatasets(self) -> None: 

1497 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1498 butler = Butler(self.tmpConfigFile, writeable=True) 

1499 assert isinstance(butler._datastore, FileDatastore) 

1500 # Load registry data with dimensions to hang datasets off of. 

1501 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1502 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1503 # Add some RUN-type collections. 

1504 run1 = "run1" 

1505 butler.registry.registerRun(run1) 

1506 run2 = "run2" 

1507 butler.registry.registerRun(run2) 

1508 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1509 # different runs. ref3 has a different data ID. 

1510 metric = makeExampleMetrics() 

1511 dimensions = butler.dimensions.extract(["instrument", "physical_filter"]) 

1512 datasetType = self.addDatasetType( 

1513 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1514 ) 

1515 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1516 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1517 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1518 

1519 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1520 for ref, stored in many_stored.items(): 

1521 self.assertTrue(stored, f"Ref {ref} should be stored") 

1522 

1523 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1524 for ref, exists in many_exists.items(): 

1525 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1526 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1527 

1528 # Simple prune. 

1529 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1530 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1531 

1532 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1533 for ref, stored in many_stored.items(): 

1534 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1535 

1536 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1537 for ref, exists in many_exists.items(): 

1538 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1539 

1540 # Put data back. 

1541 ref1_new = butler.put(metric, ref1) 

1542 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1543 ref2 = butler.put(metric, ref2) 

1544 

1545 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1546 self.assertTrue(many_stored[ref1]) 

1547 self.assertTrue(many_stored[ref2]) 

1548 self.assertFalse(many_stored[ref3]) 

1549 

1550 ref3 = butler.put(metric, ref3) 

1551 

1552 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1553 for ref, exists in many_exists.items(): 

1554 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1555 

1556 # Clear out the datasets from registry and start again. 

1557 refs = [ref1, ref2, ref3] 

1558 butler.pruneDatasets(refs, purge=True, unstore=True) 

1559 for ref in refs: 

1560 butler.put(metric, ref) 

1561 

1562 # Test different forms of file availability. 

1563 # Need to be in a state where: 

1564 # - one ref just has registry record. 

1565 # - one ref has a missing file but a datastore record. 

1566 # - one ref has a missing datastore record but file is there. 

1567 # - one ref does not exist anywhere. 

1568 # Do not need to test a ref that has everything since that is tested 

1569 # above. 

1570 ref0 = DatasetRef( 

1571 datasetType, 

1572 DataCoordinate.standardize( 

1573 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1574 ), 

1575 run=run1, 

1576 ) 

1577 

1578 # Delete from datastore and retain in Registry. 

1579 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1580 

1581 # File has been removed. 

1582 uri2 = butler.getURI(ref2) 

1583 uri2.remove() 

1584 

1585 # Datastore has lost track. 

1586 butler._datastore.forget([ref3]) 

1587 

1588 # First test with a standard butler. 

1589 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1590 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1591 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1592 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1593 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1594 

1595 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1596 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1597 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1598 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1599 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1600 self.assertTrue(exists_many[ref2]) 

1601 

1602 # Check that per-ref query gives the same answer as many query. 

1603 for ref, exists in exists_many.items(): 

1604 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1605 

1606 # Test again with a trusting butler. 

1607 butler._datastore.trustGetRequest = True 

1608 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1609 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1610 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1611 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1612 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1613 

1614 # Check that per-ref query gives the same answer as many query. 

1615 for ref, exists in exists_many.items(): 

1616 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1617 

1618 # Create a ref that surprisingly has the UUID of an existing ref 

1619 # but is not the same. 

1620 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1621 with self.assertRaises(ValueError): 

1622 butler.exists(ref_bad) 

1623 

1624 # Create a ref that has a compatible storage class. 

1625 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1626 exists = butler.exists(ref_compat) 

1627 self.assertEqual(exists, exists_many[ref2]) 

1628 

1629 # Remove everything and start from scratch. 

1630 butler._datastore.trustGetRequest = False 

1631 butler.pruneDatasets(refs, purge=True, unstore=True) 

1632 for ref in refs: 

1633 butler.put(metric, ref) 

1634 

1635 # These tests mess directly with the trash table and can leave the 

1636 # datastore in an odd state. Do them at the end. 

1637 # Check that in normal mode, deleting the record will lead to 

1638 # trash not touching the file. 

1639 uri1 = butler.getURI(ref1) 

1640 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1641 butler._datastore.forget([ref1]) 

1642 butler._datastore.trash(ref1) 

1643 butler._datastore.emptyTrash() 

1644 self.assertTrue(uri1.exists()) 

1645 uri1.remove() # Clean it up. 

1646 

1647 # Simulate execution butler setup by deleting the datastore 

1648 # record but keeping the file around and trusting. 

1649 butler._datastore.trustGetRequest = True 

1650 uris = butler.get_many_uris([ref2, ref3]) 

1651 uri2 = uris[ref2].primaryURI 

1652 uri3 = uris[ref3].primaryURI 

1653 self.assertTrue(uri2.exists()) 

1654 self.assertTrue(uri3.exists()) 

1655 

1656 # Remove the datastore record. 

1657 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1658 butler._datastore.forget([ref2]) 

1659 self.assertTrue(uri2.exists()) 

1660 butler._datastore.trash([ref2, ref3]) 

1661 # Immediate removal for ref2 file 

1662 self.assertFalse(uri2.exists()) 

1663 # But ref3 has to wait for the empty. 

1664 self.assertTrue(uri3.exists()) 

1665 butler._datastore.emptyTrash() 

1666 self.assertFalse(uri3.exists()) 

1667 

1668 # Clear out the datasets from registry. 

1669 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1670 

1671 def testPytypeCoercion(self) -> None: 

1672 """Test python type coercion on Butler.get and put.""" 

1673 # Store some data with the normal example storage class. 

1674 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1675 datasetTypeName = "test_metric" 

1676 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1677 

1678 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1679 metric = butler.get(datasetTypeName, dataId=dataId) 

1680 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1681 

1682 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1683 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1684 

1685 # Now need to hack the registry dataset type definition. 

1686 # There is no API for this. 

1687 assert isinstance(butler._registry, SqlRegistry) 

1688 manager = butler._registry._managers.datasets 

1689 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1690 manager._db.update( 

1691 manager._static.dataset_type, 

1692 {"name": datasetTypeName}, 

1693 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1694 ) 

1695 

1696 # Force reset of dataset type cache 

1697 butler.registry.refresh() 

1698 

1699 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1700 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1701 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1702 

1703 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1704 self.assertNotEqual(type(metric_model), type(metric)) 

1705 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1706 

1707 # Put the model and read it back to show that everything now 

1708 # works as normal. 

1709 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1710 metric_model_new = butler.get(metric_ref) 

1711 self.assertEqual(metric_model_new, metric_model) 

1712 

1713 # Hack the storage class again to something that will fail on the 

1714 # get with no conversion class. 

1715 manager._db.update( 

1716 manager._static.dataset_type, 

1717 {"name": datasetTypeName}, 

1718 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1719 ) 

1720 butler.registry.refresh() 

1721 

1722 with self.assertRaises(ValueError): 

1723 butler.get(datasetTypeName, dataId=dataId) 

1724 

1725 

1726@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1727class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1728 """PosixDatastore specialization of a butler using Postgres""" 

1729 

1730 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1731 fullConfigKey = ".datastore.formatters" 

1732 validationCanFail = True 

1733 datastoreStr = ["/tmp"] 

1734 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1735 registryStr = "PostgreSQL@test" 

1736 postgresql: Any 

1737 

1738 @staticmethod 

1739 def _handler(postgresql: Any) -> None: 

1740 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1741 with engine.begin() as connection: 

1742 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1743 

1744 @classmethod 

1745 def setUpClass(cls) -> None: 

1746 # Create the postgres test server. 

1747 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1748 cache_initialized_db=True, on_initialized=cls._handler 

1749 ) 

1750 super().setUpClass() 

1751 

1752 @classmethod 

1753 def tearDownClass(cls) -> None: 

1754 # Clean up any lingering SQLAlchemy engines/connections 

1755 # so they're closed before we shut down the server. 

1756 gc.collect() 

1757 cls.postgresql.clear_cache() 

1758 super().tearDownClass() 

1759 

1760 def setUp(self) -> None: 

1761 self.server = self.postgresql() 

1762 

1763 # Need to add a registry section to the config. 

1764 self._temp_config = False 

1765 config = Config(self.configFile) 

1766 config["registry", "db"] = self.server.url() 

1767 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1768 config.dump(fh) 

1769 self.configFile = fh.name 

1770 self._temp_config = True 

1771 super().setUp() 

1772 

1773 def tearDown(self) -> None: 

1774 self.server.stop() 

1775 if self._temp_config and os.path.exists(self.configFile): 

1776 os.remove(self.configFile) 

1777 super().tearDown() 

1778 

1779 def testMakeRepo(self) -> None: 

1780 # The base class test assumes that it's using sqlite and assumes 

1781 # the config file is acceptable to sqlite. 

1782 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1783 

1784 

1785class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1786 """InMemoryDatastore specialization of a butler""" 

1787 

1788 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1789 fullConfigKey = None 

1790 useTempRoot = False 

1791 validationCanFail = False 

1792 datastoreStr = ["datastore='InMemory"] 

1793 datastoreName = ["InMemoryDatastore@"] 

1794 registryStr = "/gen3.sqlite3" 

1795 

1796 def testIngest(self) -> None: 

1797 pass 

1798 

1799 

1800class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1801 """PosixDatastore specialization""" 

1802 

1803 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1804 fullConfigKey = ".datastore.datastores.1.formatters" 

1805 validationCanFail = True 

1806 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1807 datastoreName = [ 

1808 "InMemoryDatastore@", 

1809 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1810 "SecondDatastore", 

1811 ] 

1812 registryStr = "/gen3.sqlite3" 

1813 

1814 

1815class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1816 """Test that a yaml file in one location can refer to a root in another.""" 

1817 

1818 datastoreStr = ["dir1"] 

1819 # Disable the makeRepo test since we are deliberately not using 

1820 # butler.yaml as the config name. 

1821 fullConfigKey = None 

1822 

1823 def setUp(self) -> None: 

1824 self.root = makeTestTempDir(TESTDIR) 

1825 

1826 # Make a new repository in one place 

1827 self.dir1 = os.path.join(self.root, "dir1") 

1828 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1829 

1830 # Move the yaml file to a different place and add a "root" 

1831 self.dir2 = os.path.join(self.root, "dir2") 

1832 os.makedirs(self.dir2, exist_ok=True) 

1833 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1834 config = Config(configFile1) 

1835 config["root"] = self.dir1 

1836 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1837 config.dumpToUri(configFile2) 

1838 os.remove(configFile1) 

1839 self.tmpConfigFile = configFile2 

1840 

1841 def testFileLocations(self) -> None: 

1842 self.assertNotEqual(self.dir1, self.dir2) 

1843 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1844 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1845 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1846 

1847 

1848class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1849 """Test that a config file created by makeRepo outside of repo works.""" 

1850 

1851 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1852 

1853 def setUp(self) -> None: 

1854 self.root = makeTestTempDir(TESTDIR) 

1855 self.root2 = makeTestTempDir(TESTDIR) 

1856 

1857 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1858 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1859 

1860 def tearDown(self) -> None: 

1861 if os.path.exists(self.root2): 

1862 shutil.rmtree(self.root2, ignore_errors=True) 

1863 super().tearDown() 

1864 

1865 def testConfigExistence(self) -> None: 

1866 c = Config(self.tmpConfigFile) 

1867 uri_config = ResourcePath(c["root"]) 

1868 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1869 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1870 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1871 

1872 def testPutGet(self) -> None: 

1873 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1874 self.runPutGetTest(storageClass, "test_metric") 

1875 

1876 

1877class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1878 """Test that a config file created by makeRepo outside of repo works.""" 

1879 

1880 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1881 

1882 def setUp(self) -> None: 

1883 self.root = makeTestTempDir(TESTDIR) 

1884 self.root2 = makeTestTempDir(TESTDIR) 

1885 

1886 self.tmpConfigFile = self.root2 

1887 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1888 

1889 def testConfigExistence(self) -> None: 

1890 # Append the yaml file else Config constructor does not know the file 

1891 # type. 

1892 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1893 super().testConfigExistence() 

1894 

1895 

1896class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1897 """Test that a config file created by makeRepo outside of repo works.""" 

1898 

1899 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1900 

1901 def setUp(self) -> None: 

1902 self.root = makeTestTempDir(TESTDIR) 

1903 self.root2 = makeTestTempDir(TESTDIR) 

1904 

1905 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1906 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1907 

1908 

1909@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1910class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1911 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1912 a local in-memory SqlRegistry. 

1913 """ 

1914 

1915 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1916 fullConfigKey = None 

1917 validationCanFail = True 

1918 

1919 bucketName = "anybucketname" 

1920 """Name of the Bucket that will be used in the tests. The name is read from 

1921 the config file used with the tests during set-up. 

1922 """ 

1923 

1924 root = "butlerRoot/" 

1925 """Root repository directory expected to be used in case useTempRoot=False. 

1926 Otherwise the root is set to a 20 characters long randomly generated string 

1927 during set-up. 

1928 """ 

1929 

1930 datastoreStr = [f"datastore={root}"] 

1931 """Contains all expected root locations in a format expected to be 

1932 returned by Butler stringification. 

1933 """ 

1934 

1935 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1936 """The expected format of the S3 Datastore string.""" 

1937 

1938 registryStr = "/gen3.sqlite3" 

1939 """Expected format of the Registry string.""" 

1940 

1941 mock_s3 = mock_s3() 

1942 """The mocked s3 interface from moto.""" 

1943 

1944 def genRoot(self) -> str: 

1945 """Return a random string of len 20 to serve as a root 

1946 name for the temporary bucket repo. 

1947 

1948 This is equivalent to tempfile.mkdtemp as this is what self.root 

1949 becomes when useTempRoot is True. 

1950 """ 

1951 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1952 return rndstr + "/" 

1953 

1954 def setUp(self) -> None: 

1955 config = Config(self.configFile) 

1956 uri = ResourcePath(config[".datastore.datastore.root"]) 

1957 self.bucketName = uri.netloc 

1958 

1959 # Enable S3 mocking of tests. 

1960 self.mock_s3.start() 

1961 

1962 # set up some fake credentials if they do not exist 

1963 self.usingDummyCredentials = setAwsEnvCredentials() 

1964 

1965 if self.useTempRoot: 

1966 self.root = self.genRoot() 

1967 rooturi = f"s3://{self.bucketName}/{self.root}" 

1968 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1969 

1970 # need local folder to store registry database 

1971 self.reg_dir = makeTestTempDir(TESTDIR) 

1972 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1973 

1974 # MOTO needs to know that we expect Bucket bucketname to exist 

1975 # (this used to be the class attribute bucketName) 

1976 s3 = boto3.resource("s3") 

1977 s3.create_bucket(Bucket=self.bucketName) 

1978 

1979 self.datastoreStr = [f"datastore='{rooturi}'"] 

1980 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1981 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1982 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1983 

1984 def tearDown(self) -> None: 

1985 s3 = boto3.resource("s3") 

1986 bucket = s3.Bucket(self.bucketName) 

1987 try: 

1988 bucket.objects.all().delete() 

1989 except botocore.exceptions.ClientError as e: 

1990 if e.response["Error"]["Code"] == "404": 

1991 # the key was not reachable - pass 

1992 pass 

1993 else: 

1994 raise 

1995 

1996 bucket = s3.Bucket(self.bucketName) 

1997 bucket.delete() 

1998 

1999 # Stop the S3 mock. 

2000 self.mock_s3.stop() 

2001 

2002 # unset any potentially set dummy credentials 

2003 if self.usingDummyCredentials: 

2004 unsetAwsEnvCredentials() 

2005 

2006 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

2007 shutil.rmtree(self.reg_dir, ignore_errors=True) 

2008 

2009 if self.useTempRoot and os.path.exists(self.root): 

2010 shutil.rmtree(self.root, ignore_errors=True) 

2011 

2012 super().tearDown() 

2013 

2014 

2015class PosixDatastoreTransfers(unittest.TestCase): 

2016 """Test data transfers between butlers. 

2017 

2018 Test for different managers. UUID to UUID and integer to integer are 

2019 tested. UUID to integer is not supported since we do not currently 

2020 want to allow that. Integer to UUID is supported with the caveat 

2021 that UUID4 will be generated and this will be incorrect for raw 

2022 dataset types. The test ignores that. 

2023 """ 

2024 

2025 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2026 storageClassFactory: StorageClassFactory 

2027 

2028 @classmethod 

2029 def setUpClass(cls) -> None: 

2030 cls.storageClassFactory = StorageClassFactory() 

2031 cls.storageClassFactory.addFromConfig(cls.configFile) 

2032 

2033 def setUp(self) -> None: 

2034 self.root = makeTestTempDir(TESTDIR) 

2035 self.config = Config(self.configFile) 

2036 

2037 def tearDown(self) -> None: 

2038 removeTestTempDir(self.root) 

2039 

2040 def create_butler(self, manager: str, label: str) -> Butler: 

2041 config = Config(self.configFile) 

2042 config["registry", "managers", "datasets"] = manager 

2043 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2044 

2045 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

2046 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

2047 if manager1 is None: 

2048 manager1 = default 

2049 if manager2 is None: 

2050 manager2 = default 

2051 self.source_butler = self.create_butler(manager1, "1") 

2052 self.target_butler = self.create_butler(manager2, "2") 

2053 

2054 def testTransferUuidToUuid(self) -> None: 

2055 self.create_butlers() 

2056 self.assertButlerTransfers() 

2057 

2058 def _enable_trust(self, datastore: Datastore) -> None: 

2059 if hasattr(datastore, "trustGetRequest"): 

2060 datastore.trustGetRequest = True 

2061 elif hasattr(datastore, "datastores"): 

2062 for this_datastore in datastore.datastores: 

2063 if hasattr(this_datastore, "trustGetRequest"): 

2064 this_datastore.trustGetRequest = True 

2065 

2066 def testTransferMissing(self) -> None: 

2067 """Test transfers where datastore records are missing. 

2068 

2069 This is how execution butler works. 

2070 """ 

2071 self.create_butlers() 

2072 

2073 # Configure the source butler to allow trust. 

2074 self._enable_trust(self.source_butler._datastore) 

2075 

2076 self.assertButlerTransfers(purge=True) 

2077 

2078 def testTransferMissingDisassembly(self) -> None: 

2079 """Test transfers where datastore records are missing. 

2080 

2081 This is how execution butler works. 

2082 """ 

2083 self.create_butlers() 

2084 

2085 # Configure the source butler to allow trust. 

2086 self._enable_trust(self.source_butler._datastore) 

2087 

2088 # Test disassembly. 

2089 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2090 

2091 def testAbsoluteURITransferDirect(self) -> None: 

2092 """Test transfer using an absolute URI.""" 

2093 self._absolute_transfer("auto") 

2094 

2095 def testAbsoluteURITransferCopy(self) -> None: 

2096 """Test transfer using an absolute URI.""" 

2097 self._absolute_transfer("copy") 

2098 

2099 def _absolute_transfer(self, transfer: str) -> None: 

2100 self.create_butlers() 

2101 

2102 storageClassName = "StructuredData" 

2103 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2104 datasetTypeName = "random_data" 

2105 run = "run1" 

2106 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2107 

2108 dimensions = self.source_butler.dimensions.extract(()) 

2109 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2110 self.source_butler.registry.registerDatasetType(datasetType) 

2111 

2112 metrics = makeExampleMetrics() 

2113 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2114 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions) 

2115 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2116 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2117 dataset = FileDataset(path=temp, refs=source_refs) 

2118 self.source_butler.ingest(dataset, transfer="direct") 

2119 

2120 self.target_butler.transfer_from( 

2121 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2122 ) 

2123 

2124 uri = self.target_butler.getURI(dataset.refs[0]) 

2125 if transfer == "auto": 

2126 self.assertEqual(uri, temp) 

2127 else: 

2128 self.assertNotEqual(uri, temp) 

2129 

2130 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2131 """Test that a run can be transferred to another butler.""" 

2132 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2133 datasetTypeName = "random_data" 

2134 

2135 # Test will create 3 collections and we will want to transfer 

2136 # two of those three. 

2137 runs = ["run1", "run2", "other"] 

2138 

2139 # Also want to use two different dataset types to ensure that 

2140 # grouping works. 

2141 datasetTypeNames = ["random_data", "random_data_2"] 

2142 

2143 # Create the run collections in the source butler. 

2144 for run in runs: 

2145 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2146 

2147 # Create dimensions in source butler. 

2148 n_exposures = 30 

2149 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2150 self.source_butler.registry.insertDimensionData( 

2151 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2152 ) 

2153 self.source_butler.registry.insertDimensionData( 

2154 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2155 ) 

2156 

2157 for i in range(n_exposures): 

2158 self.source_butler.registry.insertDimensionData( 

2159 "exposure", 

2160 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2161 ) 

2162 

2163 # Create dataset types in the source butler. 

2164 dimensions = self.source_butler.dimensions.extract(["instrument", "exposure"]) 

2165 for datasetTypeName in datasetTypeNames: 

2166 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2167 self.source_butler.registry.registerDatasetType(datasetType) 

2168 

2169 # Write a dataset to an unrelated run -- this will ensure that 

2170 # we are rewriting integer dataset ids in the target if necessary. 

2171 # Will not be relevant for UUID. 

2172 run = "distraction" 

2173 butler = Butler(butler=self.source_butler, run=run) 

2174 butler.put( 

2175 makeExampleMetrics(), 

2176 datasetTypeName, 

2177 exposure=1, 

2178 instrument="DummyCamComp", 

2179 physical_filter="d-r", 

2180 ) 

2181 

2182 # Write some example metrics to the source 

2183 butler = Butler(butler=self.source_butler) 

2184 

2185 # Set of DatasetRefs that should be in the list of refs to transfer 

2186 # but which will not be transferred. 

2187 deleted: set[DatasetRef] = set() 

2188 

2189 n_expected = 20 # Number of datasets expected to be transferred 

2190 source_refs = [] 

2191 for i in range(n_exposures): 

2192 # Put a third of datasets into each collection, only retain 

2193 # two thirds. 

2194 index = i % 3 

2195 run = runs[index] 

2196 datasetTypeName = datasetTypeNames[i % 2] 

2197 

2198 metric = MetricsExample( 

2199 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2200 ) 

2201 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2202 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2203 

2204 # Remove the datastore record using low-level API, but only 

2205 # for a specific index. 

2206 if purge and index == 1: 

2207 # For one of these delete the file as well. 

2208 # This allows the "missing" code to filter the 

2209 # file out. 

2210 # Access the individual datastores. 

2211 datastores = [] 

2212 if hasattr(butler._datastore, "datastores"): 

2213 datastores.extend(butler._datastore.datastores) 

2214 else: 

2215 datastores.append(butler._datastore) 

2216 

2217 if not deleted: 

2218 # For a chained datastore we need to remove 

2219 # files in each chain. 

2220 for datastore in datastores: 

2221 # The file might not be known to the datastore 

2222 # if constraints are used. 

2223 try: 

2224 primary, uris = datastore.getURIs(ref) 

2225 except FileNotFoundError: 

2226 continue 

2227 if primary and primary.scheme != "mem": 

2228 primary.remove() 

2229 for uri in uris.values(): 

2230 if uri.scheme != "mem": 

2231 uri.remove() 

2232 n_expected -= 1 

2233 deleted.add(ref) 

2234 

2235 # Remove the datastore record. 

2236 for datastore in datastores: 

2237 if hasattr(datastore, "removeStoredItemInfo"): 

2238 datastore.removeStoredItemInfo(ref) 

2239 

2240 if index < 2: 

2241 source_refs.append(ref) 

2242 if ref not in deleted: 

2243 new_metric = butler.get(ref) 

2244 self.assertEqual(new_metric, metric) 

2245 

2246 # Create some bad dataset types to ensure we check for inconsistent 

2247 # definitions. 

2248 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2249 for datasetTypeName in datasetTypeNames: 

2250 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2251 self.target_butler.registry.registerDatasetType(datasetType) 

2252 with self.assertRaises(ConflictingDefinitionError) as cm: 

2253 self.target_butler.transfer_from(self.source_butler, source_refs) 

2254 self.assertIn("dataset type differs", str(cm.exception)) 

2255 

2256 # And remove the bad definitions. 

2257 for datasetTypeName in datasetTypeNames: 

2258 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2259 

2260 # Transfer without creating dataset types should fail. 

2261 with self.assertRaises(KeyError): 

2262 self.target_butler.transfer_from(self.source_butler, source_refs) 

2263 

2264 # Transfer without creating dimensions should fail. 

2265 with self.assertRaises(ConflictingDefinitionError) as cm: 

2266 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2267 self.assertIn("dimension", str(cm.exception)) 

2268 

2269 # The failed transfer above leaves registry in an inconsistent 

2270 # state because the run is created but then rolled back without 

2271 # the collection cache being cleared. For now force a refresh. 

2272 # Can remove with DM-35498. 

2273 self.target_butler.registry.refresh() 

2274 

2275 # Now transfer them to the second butler, including dimensions. 

2276 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2277 transferred = self.target_butler.transfer_from( 

2278 self.source_butler, 

2279 source_refs, 

2280 register_dataset_types=True, 

2281 transfer_dimensions=True, 

2282 ) 

2283 self.assertEqual(len(transferred), n_expected) 

2284 log_output = ";".join(log_cm.output) 

2285 

2286 # A ChainedDatastore will use the in-memory datastore for mexists 

2287 # so we can not rely on the mexists log message. 

2288 self.assertIn("Number of datastore records found in source", log_output) 

2289 self.assertIn("Creating output run", log_output) 

2290 

2291 # Do the transfer twice to ensure that it will do nothing extra. 

2292 # Only do this if purge=True because it does not work for int 

2293 # dataset_id. 

2294 if purge: 

2295 # This should not need to register dataset types. 

2296 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2297 self.assertEqual(len(transferred), n_expected) 

2298 

2299 # Also do an explicit low-level transfer to trigger some 

2300 # edge cases. 

2301 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2302 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs) 

2303 log_output = ";".join(log_cm.output) 

2304 self.assertIn("no file artifacts exist", log_output) 

2305 

2306 with self.assertRaises((TypeError, AttributeError)): 

2307 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2308 

2309 with self.assertRaises(ValueError): 

2310 self.target_butler._datastore.transfer_from( 

2311 self.source_butler._datastore, source_refs, transfer="split" 

2312 ) 

2313 

2314 # Now try to get the same refs from the new butler. 

2315 for ref in source_refs: 

2316 if ref not in deleted: 

2317 new_metric = self.target_butler.get(ref) 

2318 old_metric = self.source_butler.get(ref) 

2319 self.assertEqual(new_metric, old_metric) 

2320 

2321 # Now prune run2 collection and create instead a CHAINED collection. 

2322 # This should block the transfer. 

2323 self.target_butler.removeRuns(["run2"], unstore=True) 

2324 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2325 with self.assertRaises(CollectionTypeError): 

2326 # Re-importing the run1 datasets can be problematic if they 

2327 # use integer IDs so filter those out. 

2328 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2329 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2330 

2331 

2332class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2333 """Test transfers using a chained datastore.""" 

2334 

2335 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2336 

2337 

2338class NullDatastoreTestCase(unittest.TestCase): 

2339 """Test that we can fall back to a null datastore.""" 

2340 

2341 # Need a good config to create the repo. 

2342 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2343 storageClassFactory: StorageClassFactory 

2344 

2345 @classmethod 

2346 def setUpClass(cls) -> None: 

2347 cls.storageClassFactory = StorageClassFactory() 

2348 cls.storageClassFactory.addFromConfig(cls.configFile) 

2349 

2350 def setUp(self) -> None: 

2351 """Create a new butler root for each test.""" 

2352 self.root = makeTestTempDir(TESTDIR) 

2353 Butler.makeRepo(self.root, config=Config(self.configFile)) 

2354 

2355 def tearDown(self) -> None: 

2356 removeTestTempDir(self.root) 

2357 

2358 def test_fallback(self) -> None: 

2359 # Read the butler config and mess with the datastore section. 

2360 bad_config = Config(os.path.join(self.root, "butler.yaml")) 

2361 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" 

2362 

2363 with self.assertRaises(RuntimeError): 

2364 Butler(bad_config) 

2365 

2366 butler = Butler(bad_config, writeable=True, without_datastore=True) 

2367 self.assertIsInstance(butler._datastore, NullDatastore) 

2368 

2369 # Check that registry is working. 

2370 butler.registry.registerRun("MYRUN") 

2371 collections = butler.registry.queryCollections(...) 

2372 self.assertIn("MYRUN", set(collections)) 

2373 

2374 # Create a ref. 

2375 dimensions = butler.dimensions.extract([]) 

2376 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2377 datasetTypeName = "metric" 

2378 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2379 butler.registry.registerDatasetType(datasetType) 

2380 ref = DatasetRef(datasetType, {}, run="MYRUN") 

2381 

2382 # Check that datastore will complain. 

2383 with self.assertRaises(FileNotFoundError): 

2384 butler.get(ref) 

2385 with self.assertRaises(FileNotFoundError): 

2386 butler.getURI(ref) 

2387 

2388 

2389def setup_module(module: types.ModuleType) -> None: 

2390 """Set up the module for pytest.""" 

2391 clean_environment() 

2392 

2393 

2394if __name__ == "__main__": 

2395 clean_environment() 

2396 unittest.main()