Coverage for tests/test_butler.py: 13%

1314 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-05 11:07 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Tests for Butler. 

29""" 

30from __future__ import annotations 

31 

32import gc 

33import json 

34import logging 

35import os 

36import pathlib 

37import pickle 

38import posixpath 

39import random 

40import shutil 

41import string 

42import tempfile 

43import unittest 

44import uuid 

45from collections.abc import Mapping 

46from typing import TYPE_CHECKING, Any, cast 

47 

48try: 

49 import boto3 

50 import botocore 

51 from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

52 from moto import mock_s3 # type: ignore[import] 

53except ImportError: 

54 boto3 = None 

55 

56 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] 

57 """No-op decorator in case moto mock_s3 can not be imported.""" 

58 return None 

59 

60 

61try: 

62 # It's possible but silly to have testing.postgresql installed without 

63 # having the postgresql server installed (because then nothing in 

64 # testing.postgresql would work), so we use the presence of that module 

65 # to test whether we can expect the server to be available. 

66 import testing.postgresql # type: ignore[import] 

67except ImportError: 

68 testing = None 

69 

70import astropy.time 

71import sqlalchemy 

72from lsst.daf.butler import ( 

73 Butler, 

74 ButlerConfig, 

75 ButlerRepoIndex, 

76 CollectionType, 

77 Config, 

78 DataCoordinate, 

79 DatasetExistence, 

80 DatasetRef, 

81 DatasetType, 

82 FileDataset, 

83 StorageClassFactory, 

84 ValidationError, 

85 script, 

86) 

87from lsst.daf.butler.datastore import NullDatastore 

88from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError 

89from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

90from lsst.daf.butler.direct_butler import DirectButler 

91from lsst.daf.butler.registry import ( 

92 CollectionError, 

93 CollectionTypeError, 

94 ConflictingDefinitionError, 

95 DataIdValueError, 

96 MissingCollectionError, 

97 OrphanedRecordError, 

98) 

99from lsst.daf.butler.registry.sql_registry import SqlRegistry 

100from lsst.daf.butler.repo_relocation import BUTLER_ROOT_TAG 

101from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

102from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

103from lsst.resources import ResourcePath 

104from lsst.utils import doImportType 

105from lsst.utils.introspection import get_full_type_name 

106 

107if TYPE_CHECKING: 

108 import types 

109 

110 from lsst.daf.butler import Datastore, DimensionGroup, Registry, StorageClass 

111 

112TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

113 

114 

115def clean_environment() -> None: 

116 """Remove external environment variables that affect the tests.""" 

117 for k in ( 

118 "DAF_BUTLER_REPOSITORY_INDEX", 

119 "S3_ENDPOINT_URL", 

120 "AWS_ACCESS_KEY_ID", 

121 "AWS_SECRET_ACCESS_KEY", 

122 "AWS_SHARED_CREDENTIALS_FILE", 

123 ): 

124 os.environ.pop(k, None) 

125 

126 

127def makeExampleMetrics() -> MetricsExample: 

128 """Return example dataset suitable for tests.""" 

129 return MetricsExample( 

130 {"AM1": 5.2, "AM2": 30.6}, 

131 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

132 [563, 234, 456.7, 752, 8, 9, 27], 

133 ) 

134 

135 

136class TransactionTestError(Exception): 

137 """Specific error for testing transactions, to prevent misdiagnosing 

138 that might otherwise occur when a standard exception is used. 

139 """ 

140 

141 pass 

142 

143 

144class ButlerConfigTests(unittest.TestCase): 

145 """Simple tests for ButlerConfig that are not tested in any other test 

146 cases. 

147 """ 

148 

149 def testSearchPath(self) -> None: 

150 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

151 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

152 config1 = ButlerConfig(configFile) 

153 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

154 

155 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

156 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

157 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

158 self.assertIn("testConfigs", "\n".join(cm.output)) 

159 

160 key = ("datastore", "records", "table") 

161 self.assertNotEqual(config1[key], config2[key]) 

162 self.assertEqual(config2[key], "override_record") 

163 

164 

165class ButlerPutGetTests(TestCaseMixin): 

166 """Helper method for running a suite of put/get tests from different 

167 butler configurations. 

168 """ 

169 

170 root: str 

171 default_run = "ingésτ😺" 

172 storageClassFactory: StorageClassFactory 

173 configFile: str 

174 tmpConfigFile: str 

175 

176 @staticmethod 

177 def addDatasetType( 

178 datasetTypeName: str, dimensions: DimensionGroup, storageClass: StorageClass | str, registry: Registry 

179 ) -> DatasetType: 

180 """Create a DatasetType and register it""" 

181 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

182 registry.registerDatasetType(datasetType) 

183 return datasetType 

184 

185 @classmethod 

186 def setUpClass(cls) -> None: 

187 cls.storageClassFactory = StorageClassFactory() 

188 cls.storageClassFactory.addFromConfig(cls.configFile) 

189 

190 def assertGetComponents( 

191 self, 

192 butler: Butler, 

193 datasetRef: DatasetRef, 

194 components: tuple[str, ...], 

195 reference: Any, 

196 collections: Any = None, 

197 ) -> None: 

198 datasetType = datasetRef.datasetType 

199 dataId = datasetRef.dataId 

200 deferred = butler.getDeferred(datasetRef) 

201 

202 for component in components: 

203 compTypeName = datasetType.componentTypeName(component) 

204 result = butler.get(compTypeName, dataId, collections=collections) 

205 self.assertEqual(result, getattr(reference, component)) 

206 result_deferred = deferred.get(component=component) 

207 self.assertEqual(result_deferred, result) 

208 

209 def tearDown(self) -> None: 

210 removeTestTempDir(self.root) 

211 

212 def create_butler( 

213 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

214 ) -> tuple[DirectButler, DatasetType]: 

215 butler = Butler.from_config(self.tmpConfigFile, run=run) 

216 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

217 

218 collections = set(butler.registry.queryCollections()) 

219 self.assertEqual(collections, {run}) 

220 

221 # Create and register a DatasetType 

222 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

223 

224 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

225 

226 # Add needed Dimensions 

227 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

228 butler.registry.insertDimensionData( 

229 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

230 ) 

231 butler.registry.insertDimensionData( 

232 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

233 ) 

234 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

235 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

236 butler.registry.insertDimensionData( 

237 "visit", 

238 { 

239 "instrument": "DummyCamComp", 

240 "id": 423, 

241 "name": "fourtwentythree", 

242 "physical_filter": "d-r", 

243 "datetime_begin": visit_start, 

244 "datetime_end": visit_end, 

245 }, 

246 ) 

247 

248 # Add more visits for some later tests 

249 for visit_id in (424, 425): 

250 butler.registry.insertDimensionData( 

251 "visit", 

252 { 

253 "instrument": "DummyCamComp", 

254 "id": visit_id, 

255 "name": f"fourtwentyfour_{visit_id}", 

256 "physical_filter": "d-r", 

257 }, 

258 ) 

259 return butler, datasetType 

260 

261 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> DirectButler: 

262 # New datasets will be added to run and tag, but we will only look in 

263 # tag when looking up datasets. 

264 run = self.default_run 

265 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

266 assert butler.run is not None 

267 

268 # Create and store a dataset 

269 metric = makeExampleMetrics() 

270 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

271 

272 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

273 # and once with a DatasetType 

274 

275 # Keep track of any collections we add and do not clean up 

276 expected_collections = {run} 

277 

278 counter = 0 

279 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

280 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

281 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

282 # Since we are using subTest we can get cascading failures 

283 # here with the first attempt failing and the others failing 

284 # immediately because the dataset already exists. Work around 

285 # this by using a distinct run collection each time 

286 counter += 1 

287 this_run = f"put_run_{counter}" 

288 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

289 expected_collections.update({this_run}) 

290 

291 with self.subTest(args=args): 

292 kwargs: dict[str, Any] = {} 

293 if not isinstance(args[0], DatasetRef): # type: ignore 

294 kwargs["run"] = this_run 

295 ref = butler.put(metric, *args, **kwargs) 

296 self.assertIsInstance(ref, DatasetRef) 

297 

298 # Test getDirect 

299 metricOut = butler.get(ref) 

300 self.assertEqual(metric, metricOut) 

301 # Test get 

302 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

303 self.assertEqual(metric, metricOut) 

304 # Test get with a datasetRef 

305 metricOut = butler.get(ref) 

306 self.assertEqual(metric, metricOut) 

307 # Test getDeferred with dataId 

308 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

309 self.assertEqual(metric, metricOut) 

310 # Test getDeferred with a ref 

311 metricOut = butler.getDeferred(ref).get() 

312 self.assertEqual(metric, metricOut) 

313 

314 # Check we can get components 

315 if storageClass.isComposite(): 

316 self.assertGetComponents( 

317 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

318 ) 

319 

320 primary_uri, secondary_uris = butler.getURIs(ref) 

321 n_uris = len(secondary_uris) 

322 if primary_uri: 

323 n_uris += 1 

324 

325 # Can the artifacts themselves be retrieved? 

326 if not butler._datastore.isEphemeral: 

327 # Create a temporary directory to hold the retrieved 

328 # artifacts. 

329 with tempfile.TemporaryDirectory( 

330 prefix="butler-artifacts-", ignore_cleanup_errors=True 

331 ) as artifact_root: 

332 root_uri = ResourcePath(artifact_root, forceDirectory=True) 

333 

334 for preserve_path in (True, False): 

335 destination = root_uri.join(f"{preserve_path}_{counter}/") 

336 log = logging.getLogger("lsst.x") 

337 log.warning("Using destination %s for args %s", destination, args) 

338 # Use copy so that we can test that overwrite 

339 # protection works (using "auto" for File URIs 

340 # would use hard links and subsequent transfer 

341 # would work because it knows they are the same 

342 # file). 

343 transferred = butler.retrieveArtifacts( 

344 [ref], destination, preserve_path=preserve_path, transfer="copy" 

345 ) 

346 self.assertGreater(len(transferred), 0) 

347 artifacts = list(ResourcePath.findFileResources([destination])) 

348 self.assertEqual(set(transferred), set(artifacts)) 

349 

350 for artifact in transferred: 

351 path_in_destination = artifact.relative_to(destination) 

352 self.assertIsNotNone(path_in_destination) 

353 assert path_in_destination is not None 

354 

355 # When path is not preserved there should not 

356 # be any path separators. 

357 num_seps = path_in_destination.count("/") 

358 if preserve_path: 

359 self.assertGreater(num_seps, 0) 

360 else: 

361 self.assertEqual(num_seps, 0) 

362 

363 self.assertEqual( 

364 len(artifacts), 

365 n_uris, 

366 "Comparing expected artifacts vs actual:" 

367 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

368 ) 

369 

370 if preserve_path: 

371 # No need to run these twice 

372 with self.assertRaises(ValueError): 

373 butler.retrieveArtifacts([ref], destination, transfer="move") 

374 

375 with self.assertRaises(FileExistsError): 

376 butler.retrieveArtifacts([ref], destination) 

377 

378 transferred_again = butler.retrieveArtifacts( 

379 [ref], destination, preserve_path=preserve_path, overwrite=True 

380 ) 

381 self.assertEqual(set(transferred_again), set(transferred)) 

382 

383 # Now remove the dataset completely. 

384 butler.pruneDatasets([ref], purge=True, unstore=True) 

385 # Lookup with original args should still fail. 

386 kwargs = {"collections": this_run} 

387 if isinstance(args[0], DatasetRef): 

388 kwargs = {} # Prevent warning from being issued. 

389 self.assertFalse(butler.exists(*args, **kwargs)) 

390 # get() should still fail. 

391 with self.assertRaises(FileNotFoundError): 

392 butler.get(ref) 

393 # Registry shouldn't be able to find it by dataset_id anymore. 

394 self.assertIsNone(butler.get_dataset(ref.id)) 

395 

396 # Do explicit registry removal since we know they are 

397 # empty 

398 butler.registry.removeCollection(this_run) 

399 expected_collections.remove(this_run) 

400 

401 # Create DatasetRef for put using default run. 

402 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

403 

404 # Check that getDeferred fails with standalone ref. 

405 with self.assertRaises(LookupError): 

406 butler.getDeferred(refIn) 

407 

408 # Put the dataset again, since the last thing we did was remove it 

409 # and we want to use the default collection. 

410 ref = butler.put(metric, refIn) 

411 

412 # Get with parameters 

413 stop = 4 

414 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

415 self.assertNotEqual(metric, sliced) 

416 self.assertEqual(metric.summary, sliced.summary) 

417 self.assertEqual(metric.output, sliced.output) 

418 assert metric.data is not None # for mypy 

419 self.assertEqual(metric.data[:stop], sliced.data) 

420 # getDeferred with parameters 

421 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

422 self.assertNotEqual(metric, sliced) 

423 self.assertEqual(metric.summary, sliced.summary) 

424 self.assertEqual(metric.output, sliced.output) 

425 self.assertEqual(metric.data[:stop], sliced.data) 

426 # getDeferred with deferred parameters 

427 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

428 self.assertNotEqual(metric, sliced) 

429 self.assertEqual(metric.summary, sliced.summary) 

430 self.assertEqual(metric.output, sliced.output) 

431 self.assertEqual(metric.data[:stop], sliced.data) 

432 

433 if storageClass.isComposite(): 

434 # Check that components can be retrieved 

435 metricOut = butler.get(ref.datasetType.name, dataId) 

436 compNameS = ref.datasetType.componentTypeName("summary") 

437 compNameD = ref.datasetType.componentTypeName("data") 

438 summary = butler.get(compNameS, dataId) 

439 self.assertEqual(summary, metric.summary) 

440 data = butler.get(compNameD, dataId) 

441 self.assertEqual(data, metric.data) 

442 

443 if "counter" in storageClass.derivedComponents: 

444 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

445 self.assertEqual(count, len(data)) 

446 

447 count = butler.get( 

448 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

449 ) 

450 self.assertEqual(count, stop) 

451 

452 compRef = butler.find_dataset(compNameS, dataId, collections=butler.collections) 

453 assert compRef is not None 

454 summary = butler.get(compRef) 

455 self.assertEqual(summary, metric.summary) 

456 

457 # Create a Dataset type that has the same name but is inconsistent. 

458 inconsistentDatasetType = DatasetType( 

459 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

460 ) 

461 

462 # Getting with a dataset type that does not match registry fails 

463 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"): 

464 butler.get(inconsistentDatasetType, dataId) 

465 

466 # Combining a DatasetRef with a dataId should fail 

467 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

468 butler.get(ref, dataId) 

469 # Getting with an explicit ref should fail if the id doesn't match. 

470 with self.assertRaises(FileNotFoundError): 

471 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

472 

473 # Getting a dataset with unknown parameters should fail 

474 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

475 butler.get(ref, parameters={"unsupported": True}) 

476 

477 # Check we have a collection 

478 collections = set(butler.registry.queryCollections()) 

479 self.assertEqual(collections, expected_collections) 

480 

481 # Clean up to check that we can remove something that may have 

482 # already had a component removed 

483 butler.pruneDatasets([ref], unstore=True, purge=True) 

484 

485 # Add the same ref again, so we can check that duplicate put fails. 

486 ref = butler.put(metric, datasetType, dataId) 

487 

488 # Repeat put will fail. 

489 with self.assertRaisesRegex( 

490 ConflictingDefinitionError, "A database constraint failure was triggered" 

491 ): 

492 butler.put(metric, datasetType, dataId) 

493 

494 # Remove the datastore entry. 

495 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

496 

497 # Put will still fail 

498 with self.assertRaisesRegex( 

499 ConflictingDefinitionError, "A database constraint failure was triggered" 

500 ): 

501 butler.put(metric, datasetType, dataId) 

502 

503 # Repeat the same sequence with resolved ref. 

504 butler.pruneDatasets([ref], unstore=True, purge=True) 

505 ref = butler.put(metric, refIn) 

506 

507 # Repeat put will fail. 

508 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

509 butler.put(metric, refIn) 

510 

511 # Remove the datastore entry. 

512 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

513 

514 # In case of resolved ref this write will succeed. 

515 ref = butler.put(metric, refIn) 

516 

517 # Leave the dataset in place since some downstream tests require 

518 # something to be present 

519 

520 return butler 

521 

522 def testDeferredCollectionPassing(self) -> None: 

523 # Construct a butler with no run or collection, but make it writeable. 

524 butler = Butler.from_config(self.tmpConfigFile, writeable=True) 

525 # Create and register a DatasetType 

526 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

527 datasetType = self.addDatasetType( 

528 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

529 ) 

530 # Add needed Dimensions 

531 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

532 butler.registry.insertDimensionData( 

533 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

534 ) 

535 butler.registry.insertDimensionData( 

536 "visit", 

537 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

538 ) 

539 dataId = {"instrument": "DummyCamComp", "visit": 423} 

540 # Create dataset. 

541 metric = makeExampleMetrics() 

542 # Register a new run and put dataset. 

543 run = "deferred" 

544 self.assertTrue(butler.registry.registerRun(run)) 

545 # Second time it will be allowed but indicate no-op 

546 self.assertFalse(butler.registry.registerRun(run)) 

547 ref = butler.put(metric, datasetType, dataId, run=run) 

548 # Putting with no run should fail with TypeError. 

549 with self.assertRaises(CollectionError): 

550 butler.put(metric, datasetType, dataId) 

551 # Dataset should exist. 

552 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

553 # We should be able to get the dataset back, but with and without 

554 # a deferred dataset handle. 

555 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

556 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

557 # Trying to find the dataset without any collection is a TypeError. 

558 self.assertFalse(butler.exists(datasetType, dataId)) 

559 with self.assertRaises(CollectionError): 

560 butler.get(datasetType, dataId) 

561 # Associate the dataset with a different collection. 

562 butler.registry.registerCollection("tagged") 

563 butler.registry.associate("tagged", [ref]) 

564 # Deleting the dataset from the new collection should make it findable 

565 # in the original collection. 

566 butler.pruneDatasets([ref], tags=["tagged"]) 

567 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

568 

569 

570class ButlerTests(ButlerPutGetTests): 

571 """Tests for Butler.""" 

572 

573 useTempRoot = True 

574 validationCanFail: bool 

575 fullConfigKey: str | None 

576 registryStr: str | None 

577 datastoreName: list[str] | None 

578 datastoreStr: list[str] 

579 

580 def setUp(self) -> None: 

581 """Create a new butler root for each test.""" 

582 self.root = makeTestTempDir(TESTDIR) 

583 Butler.makeRepo(self.root, config=Config(self.configFile)) 

584 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

585 

586 def testConstructor(self) -> None: 

587 """Independent test of constructor.""" 

588 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

589 self.assertIsInstance(butler, Butler) 

590 

591 # Check that butler.yaml is added automatically. 

592 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

593 config_dir = self.tmpConfigFile[: -len(end)] 

594 butler = Butler.from_config(config_dir, run=self.default_run) 

595 self.assertIsInstance(butler, Butler) 

596 

597 # Even with a ResourcePath. 

598 butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

599 self.assertIsInstance(butler, Butler) 

600 

601 collections = set(butler.registry.queryCollections()) 

602 self.assertEqual(collections, {self.default_run}) 

603 

604 # Check that some special characters can be included in run name. 

605 special_run = "u@b.c-A" 

606 butler_special = Butler.from_config(butler=butler, run=special_run) 

607 collections = set(butler_special.registry.queryCollections("*@*")) 

608 self.assertEqual(collections, {special_run}) 

609 

610 butler2 = Butler.from_config(butler=butler, collections=["other"]) 

611 self.assertEqual(butler2.collections, ("other",)) 

612 self.assertIsNone(butler2.run) 

613 self.assertIs(butler._datastore, butler2._datastore) 

614 

615 # Test that we can use an environment variable to find this 

616 # repository. 

617 butler_index = Config() 

618 butler_index["label"] = self.tmpConfigFile 

619 for suffix in (".yaml", ".json"): 

620 # Ensure that the content differs so that we know that 

621 # we aren't reusing the cache. 

622 bad_label = f"file://bucket/not_real{suffix}" 

623 butler_index["bad_label"] = bad_label 

624 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

625 butler_index.dumpToUri(temp_file) 

626 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

627 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"}) 

628 uri = Butler.get_repo_uri("bad_label") 

629 self.assertEqual(uri, ResourcePath(bad_label)) 

630 uri = Butler.get_repo_uri("label") 

631 butler = Butler.from_config(uri, writeable=False) 

632 self.assertIsInstance(butler, Butler) 

633 butler = Butler.from_config("label", writeable=False) 

634 self.assertIsInstance(butler, Butler) 

635 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

636 Butler.from_config("not_there", writeable=False) 

637 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): 

638 Butler.from_config("bad_label") 

639 with self.assertRaises(FileNotFoundError): 

640 # Should ignore aliases. 

641 Butler.from_config(ResourcePath("label", forceAbsolute=False)) 

642 with self.assertRaises(KeyError) as cm: 

643 Butler.get_repo_uri("missing") 

644 self.assertEqual( 

645 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False) 

646 ) 

647 self.assertIn("not known to", str(cm.exception)) 

648 # Should report no failure. 

649 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "") 

650 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

651 # Now with empty configuration. 

652 butler_index = Config() 

653 butler_index.dumpToUri(temp_file) 

654 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

655 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): 

656 Butler.from_config("label") 

657 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

658 # Now with bad contents. 

659 with open(temp_file.ospath, "w") as fh: 

660 print("'", file=fh) 

661 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

662 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): 

663 Butler.from_config("label") 

664 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

665 with self.assertRaises(FileNotFoundError): 

666 Butler.get_repo_uri("label") 

667 self.assertEqual(Butler.get_known_repos(), set()) 

668 

669 with self.assertRaisesRegex(FileNotFoundError, "index file not found"): 

670 Butler.from_config("label") 

671 

672 # Check that we can create Butler when the alias file is not found. 

673 butler = Butler.from_config(self.tmpConfigFile, writeable=False) 

674 self.assertIsInstance(butler, Butler) 

675 with self.assertRaises(KeyError) as cm: 

676 # No environment variable set. 

677 Butler.get_repo_uri("label") 

678 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) 

679 self.assertIn("No repository index defined", str(cm.exception)) 

680 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): 

681 # No aliases registered. 

682 Butler.from_config("not_there") 

683 self.assertEqual(Butler.get_known_repos(), set()) 

684 

685 def testBasicPutGet(self) -> None: 

686 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

687 self.runPutGetTest(storageClass, "test_metric") 

688 

689 def testCompositePutGetConcrete(self) -> None: 

690 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

691 butler = self.runPutGetTest(storageClass, "test_metric") 

692 

693 # Should *not* be disassembled 

694 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

695 self.assertEqual(len(datasets), 1) 

696 uri, components = butler.getURIs(datasets[0]) 

697 self.assertIsInstance(uri, ResourcePath) 

698 self.assertFalse(components) 

699 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

700 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

701 

702 # Predicted dataset 

703 dataId = {"instrument": "DummyCamComp", "visit": 424} 

704 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

705 self.assertFalse(components) 

706 self.assertIsInstance(uri, ResourcePath) 

707 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

708 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

709 

710 def testCompositePutGetVirtual(self) -> None: 

711 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

712 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

713 

714 # Should be disassembled 

715 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

716 self.assertEqual(len(datasets), 1) 

717 uri, components = butler.getURIs(datasets[0]) 

718 

719 if butler._datastore.isEphemeral: 

720 # Never disassemble in-memory datastore 

721 self.assertIsInstance(uri, ResourcePath) 

722 self.assertFalse(components) 

723 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

724 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

725 else: 

726 self.assertIsNone(uri) 

727 self.assertEqual(set(components), set(storageClass.components)) 

728 for compuri in components.values(): 

729 self.assertIsInstance(compuri, ResourcePath) 

730 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

731 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

732 

733 # Predicted dataset 

734 dataId = {"instrument": "DummyCamComp", "visit": 424} 

735 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

736 

737 if butler._datastore.isEphemeral: 

738 # Never disassembled 

739 self.assertIsInstance(uri, ResourcePath) 

740 self.assertFalse(components) 

741 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

742 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

743 else: 

744 self.assertIsNone(uri) 

745 self.assertEqual(set(components), set(storageClass.components)) 

746 for compuri in components.values(): 

747 self.assertIsInstance(compuri, ResourcePath) 

748 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

749 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

750 

751 def testStorageClassOverrideGet(self) -> None: 

752 """Test storage class conversion on get with override.""" 

753 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

754 datasetTypeName = "anything" 

755 run = self.default_run 

756 

757 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

758 

759 # Create and store a dataset. 

760 metric = makeExampleMetrics() 

761 dataId = {"instrument": "DummyCamComp", "visit": 423} 

762 

763 ref = butler.put(metric, datasetType, dataId) 

764 

765 # Return native type. 

766 retrieved = butler.get(ref) 

767 self.assertEqual(retrieved, metric) 

768 

769 # Specify an override. 

770 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

771 model = butler.get(ref, storageClass=new_sc) 

772 self.assertNotEqual(type(model), type(retrieved)) 

773 self.assertIs(type(model), new_sc.pytype) 

774 self.assertEqual(retrieved, model) 

775 

776 # Defer but override later. 

777 deferred = butler.getDeferred(ref) 

778 model = deferred.get(storageClass=new_sc) 

779 self.assertIs(type(model), new_sc.pytype) 

780 self.assertEqual(retrieved, model) 

781 

782 # Defer but override up front. 

783 deferred = butler.getDeferred(ref, storageClass=new_sc) 

784 model = deferred.get() 

785 self.assertIs(type(model), new_sc.pytype) 

786 self.assertEqual(retrieved, model) 

787 

788 # Retrieve a component. Should be a tuple. 

789 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

790 self.assertIs(type(data), tuple) 

791 self.assertEqual(data, tuple(retrieved.data)) 

792 

793 # Parameter on the write storage class should work regardless 

794 # of read storage class. 

795 data = butler.get( 

796 "anything.data", 

797 dataId, 

798 storageClass="StructuredDataDataTestTuple", 

799 parameters={"slice": slice(2, 4)}, 

800 ) 

801 self.assertEqual(len(data), 2) 

802 

803 # Try a parameter that is known to the read storage class but not 

804 # the write storage class. 

805 with self.assertRaises(KeyError): 

806 butler.get( 

807 "anything.data", 

808 dataId, 

809 storageClass="StructuredDataDataTestTuple", 

810 parameters={"xslice": slice(2, 4)}, 

811 ) 

812 

813 def testPytypePutCoercion(self) -> None: 

814 """Test python type coercion on Butler.get and put.""" 

815 # Store some data with the normal example storage class. 

816 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

817 datasetTypeName = "test_metric" 

818 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

819 

820 dataId = {"instrument": "DummyCamComp", "visit": 423} 

821 

822 # Put a dict and this should coerce to a MetricsExample 

823 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

824 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

825 test_metric = butler.get(metric_ref) 

826 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

827 self.assertEqual(test_metric.summary, test_dict["summary"]) 

828 self.assertEqual(test_metric.output, test_dict["output"]) 

829 

830 # Check that the put still works if a DatasetType is given with 

831 # a definition matching this python type. 

832 registry_type = butler.get_dataset_type(datasetTypeName) 

833 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

834 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

835 self.assertEqual(metric2_ref.datasetType, registry_type) 

836 

837 # The get will return the type expected by registry. 

838 test_metric2 = butler.get(metric2_ref) 

839 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

840 

841 # Make a new DatasetRef with the compatible but different DatasetType. 

842 # This should now return a dict. 

843 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

844 test_dict2 = butler.get(new_ref) 

845 self.assertEqual(get_full_type_name(test_dict2), "dict") 

846 

847 # Get it again with the wrong dataset type definition using get() 

848 # rather than get(). This should be consistent with get() 

849 # behavior and return the type of the DatasetType. 

850 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

851 self.assertEqual(get_full_type_name(test_dict3), "dict") 

852 

853 def testIngest(self) -> None: 

854 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

855 

856 # Create and register a DatasetType 

857 dimensions = butler.dimensions.conform(["instrument", "visit", "detector"]) 

858 

859 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

860 datasetTypeName = "metric" 

861 

862 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

863 

864 # Add needed Dimensions 

865 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

866 butler.registry.insertDimensionData( 

867 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

868 ) 

869 for detector in (1, 2): 

870 butler.registry.insertDimensionData( 

871 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

872 ) 

873 

874 butler.registry.insertDimensionData( 

875 "visit", 

876 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

877 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

878 ) 

879 

880 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

881 dataRoot = os.path.join(TESTDIR, "data", "basic") 

882 datasets = [] 

883 for detector in (1, 2): 

884 detector_name = f"detector_{detector}" 

885 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

886 dataId = butler.registry.expandDataId( 

887 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

888 ) 

889 # Create a DatasetRef for ingest 

890 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

891 

892 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

893 

894 butler.ingest(*datasets, transfer="copy") 

895 

896 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

897 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

898 

899 metrics1 = butler.get(datasetTypeName, dataId1) 

900 metrics2 = butler.get(datasetTypeName, dataId2) 

901 self.assertNotEqual(metrics1, metrics2) 

902 

903 # Compare URIs 

904 uri1 = butler.getURI(datasetTypeName, dataId1) 

905 uri2 = butler.getURI(datasetTypeName, dataId2) 

906 self.assertNotEqual(uri1, uri2) 

907 

908 # Now do a multi-dataset but single file ingest 

909 metricFile = os.path.join(dataRoot, "detectors.yaml") 

910 refs = [] 

911 for detector in (1, 2): 

912 detector_name = f"detector_{detector}" 

913 dataId = butler.registry.expandDataId( 

914 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

915 ) 

916 # Create a DatasetRef for ingest 

917 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

918 

919 # Test "move" transfer to ensure that the files themselves 

920 # have disappeared following ingest. 

921 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

922 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

923 

924 datasets = [] 

925 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

926 

927 # For first ingest use copy. 

928 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

929 

930 # Now try to ingest again in "execution butler" mode where 

931 # the registry entries exist but the datastore does not have 

932 # the files. We also need to strip the dimension records to ensure 

933 # that they will be re-added by the ingest. 

934 ref = datasets[0].refs[0] 

935 datasets[0].refs = [ 

936 cast( 

937 DatasetRef, 

938 butler.find_dataset(ref.datasetType, data_id=ref.dataId, collections=ref.run), 

939 ) 

940 for ref in datasets[0].refs 

941 ] 

942 all_refs = [] 

943 for dataset in datasets: 

944 refs = [] 

945 for ref in dataset.refs: 

946 # Create a dict from the dataId to drop the records. 

947 new_data_id = dict(ref.dataId.required) 

948 new_ref = butler.find_dataset(ref.datasetType, new_data_id, collections=ref.run) 

949 assert new_ref is not None 

950 self.assertFalse(new_ref.dataId.hasRecords()) 

951 refs.append(new_ref) 

952 dataset.refs = refs 

953 all_refs.extend(dataset.refs) 

954 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

955 

956 # Use move mode to test that the file is deleted. Also 

957 # disable recording of file size. 

958 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

959 

960 # Check that every ref now has records. 

961 for dataset in datasets: 

962 for ref in dataset.refs: 

963 self.assertTrue(ref.dataId.hasRecords()) 

964 

965 # Ensure that the file has disappeared. 

966 self.assertFalse(tempFile.exists()) 

967 

968 # Check that the datastore recorded no file size. 

969 # Not all datastores can support this. 

970 try: 

971 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

972 self.assertEqual(infos[0].file_size, -1) 

973 except AttributeError: 

974 pass 

975 

976 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

977 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

978 

979 multi1 = butler.get(datasetTypeName, dataId1) 

980 multi2 = butler.get(datasetTypeName, dataId2) 

981 

982 self.assertEqual(multi1, metrics1) 

983 self.assertEqual(multi2, metrics2) 

984 

985 # Compare URIs 

986 uri1 = butler.getURI(datasetTypeName, dataId1) 

987 uri2 = butler.getURI(datasetTypeName, dataId2) 

988 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

989 

990 # Test that removing one does not break the second 

991 # This line will issue a warning log message for a ChainedDatastore 

992 # that uses an InMemoryDatastore since in-memory can not ingest 

993 # files. 

994 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

995 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

996 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

997 multi2b = butler.get(datasetTypeName, dataId2) 

998 self.assertEqual(multi2, multi2b) 

999 

1000 # Ensure we can ingest 0 datasets 

1001 datasets = [] 

1002 butler.ingest(*datasets) 

1003 

1004 def testPickle(self) -> None: 

1005 """Test pickle support.""" 

1006 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1007 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

1008 butlerOut = pickle.loads(pickle.dumps(butler)) 

1009 self.assertIsInstance(butlerOut, Butler) 

1010 self.assertEqual(butlerOut._config, butler._config) 

1011 self.assertEqual(butlerOut.collections, butler.collections) 

1012 self.assertEqual(butlerOut.run, butler.run) 

1013 

1014 def testGetDatasetTypes(self) -> None: 

1015 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1016 dimensions = butler.dimensions.conform(["instrument", "visit", "physical_filter"]) 

1017 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

1018 ( 

1019 "instrument", 

1020 [ 

1021 {"instrument": "DummyCam"}, 

1022 {"instrument": "DummyHSC"}, 

1023 {"instrument": "DummyCamComp"}, 

1024 ], 

1025 ), 

1026 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

1027 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]), 

1028 ] 

1029 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1030 # Add needed Dimensions 

1031 for element, data in dimensionEntries: 

1032 butler.registry.insertDimensionData(element, *data) 

1033 

1034 # When a DatasetType is added to the registry entries are not created 

1035 # for components but querying them can return the components. 

1036 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1037 components = set() 

1038 for datasetTypeName in datasetTypeNames: 

1039 # Create and register a DatasetType 

1040 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1041 

1042 for componentName in storageClass.components: 

1043 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1044 

1045 fromRegistry: set[DatasetType] = set() 

1046 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1047 fromRegistry.add(parent_dataset_type) 

1048 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1049 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1050 

1051 # Now that we have some dataset types registered, validate them 

1052 butler.validateConfiguration( 

1053 ignore=[ 

1054 "test_metric_comp", 

1055 "metric3", 

1056 "metric5", 

1057 "calexp", 

1058 "DummySC", 

1059 "datasetType.component", 

1060 "random_data", 

1061 "random_data_2", 

1062 ] 

1063 ) 

1064 

1065 # Add a new datasetType that will fail template validation 

1066 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1067 if self.validationCanFail: 

1068 with self.assertRaises(ValidationError): 

1069 butler.validateConfiguration() 

1070 

1071 # Rerun validation but with a subset of dataset type names 

1072 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1073 

1074 # Rerun validation but ignore the bad datasetType 

1075 butler.validateConfiguration( 

1076 ignore=[ 

1077 "test_metric_comp", 

1078 "metric3", 

1079 "metric5", 

1080 "calexp", 

1081 "DummySC", 

1082 "datasetType.component", 

1083 "random_data", 

1084 "random_data_2", 

1085 ] 

1086 ) 

1087 

1088 def testTransaction(self) -> None: 

1089 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1090 datasetTypeName = "test_metric" 

1091 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1092 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1093 ("instrument", {"instrument": "DummyCam"}), 

1094 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1095 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1096 ) 

1097 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1098 metric = makeExampleMetrics() 

1099 dataId = {"instrument": "DummyCam", "visit": 42} 

1100 # Create and register a DatasetType 

1101 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1102 with self.assertRaises(TransactionTestError): 

1103 with butler.transaction(): 

1104 # Add needed Dimensions 

1105 for args in dimensionEntries: 

1106 butler.registry.insertDimensionData(*args) 

1107 # Store a dataset 

1108 ref = butler.put(metric, datasetTypeName, dataId) 

1109 self.assertIsInstance(ref, DatasetRef) 

1110 # Test getDirect 

1111 metricOut = butler.get(ref) 

1112 self.assertEqual(metric, metricOut) 

1113 # Test get 

1114 metricOut = butler.get(datasetTypeName, dataId) 

1115 self.assertEqual(metric, metricOut) 

1116 # Check we can get components 

1117 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1118 raise TransactionTestError("This should roll back the entire transaction") 

1119 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1120 butler.registry.expandDataId(dataId) 

1121 # Should raise LookupError for missing data ID value 

1122 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1123 butler.get(datasetTypeName, dataId) 

1124 # Also check explicitly if Dataset entry is missing 

1125 self.assertIsNone(butler.find_dataset(datasetType, dataId, collections=butler.collections)) 

1126 # Direct retrieval should not find the file in the Datastore 

1127 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1128 butler.get(ref) 

1129 

1130 def testMakeRepo(self) -> None: 

1131 """Test that we can write butler configuration to a new repository via 

1132 the Butler.makeRepo interface and then instantiate a butler from the 

1133 repo root. 

1134 """ 

1135 # Do not run the test if we know this datastore configuration does 

1136 # not support a file system root 

1137 if self.fullConfigKey is None: 

1138 return 

1139 

1140 # create two separate directories 

1141 root1 = tempfile.mkdtemp(dir=self.root) 

1142 root2 = tempfile.mkdtemp(dir=self.root) 

1143 

1144 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1145 limited = Config(self.configFile) 

1146 butler1 = Butler.from_config(butlerConfig) 

1147 assert isinstance(butler1, DirectButler), "Expect DirectButler in configuration" 

1148 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1149 full = Config(self.tmpConfigFile) 

1150 butler2 = Butler.from_config(butlerConfig) 

1151 assert isinstance(butler2, DirectButler), "Expect DirectButler in configuration" 

1152 # Butlers should have the same configuration regardless of whether 

1153 # defaults were expanded. 

1154 self.assertEqual(butler1._config, butler2._config) 

1155 # Config files loaded directly should not be the same. 

1156 self.assertNotEqual(limited, full) 

1157 # Make sure "limited" doesn't have a few keys we know it should be 

1158 # inheriting from defaults. 

1159 self.assertIn(self.fullConfigKey, full) 

1160 self.assertNotIn(self.fullConfigKey, limited) 

1161 

1162 # Collections don't appear until something is put in them 

1163 collections1 = set(butler1.registry.queryCollections()) 

1164 self.assertEqual(collections1, set()) 

1165 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1166 

1167 # Check that a config with no associated file name will not 

1168 # work properly with relocatable Butler repo 

1169 butlerConfig.configFile = None 

1170 with self.assertRaises(ValueError): 

1171 Butler.from_config(butlerConfig) 

1172 

1173 with self.assertRaises(FileExistsError): 

1174 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1175 

1176 def testStringification(self) -> None: 

1177 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1178 butlerStr = str(butler) 

1179 

1180 if self.datastoreStr is not None: 

1181 for testStr in self.datastoreStr: 

1182 self.assertIn(testStr, butlerStr) 

1183 if self.registryStr is not None: 

1184 self.assertIn(self.registryStr, butlerStr) 

1185 

1186 datastoreName = butler._datastore.name 

1187 if self.datastoreName is not None: 

1188 for testStr in self.datastoreName: 

1189 self.assertIn(testStr, datastoreName) 

1190 

1191 def testButlerRewriteDataId(self) -> None: 

1192 """Test that dataIds can be rewritten based on dimension records.""" 

1193 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1194 

1195 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1196 datasetTypeName = "random_data" 

1197 

1198 # Create dimension records. 

1199 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1200 butler.registry.insertDimensionData( 

1201 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1202 ) 

1203 butler.registry.insertDimensionData( 

1204 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1205 ) 

1206 

1207 dimensions = butler.dimensions.conform(["instrument", "exposure"]) 

1208 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1209 butler.registry.registerDatasetType(datasetType) 

1210 

1211 n_exposures = 5 

1212 dayobs = 20210530 

1213 

1214 for i in range(n_exposures): 

1215 butler.registry.insertDimensionData( 

1216 "exposure", 

1217 { 

1218 "instrument": "DummyCamComp", 

1219 "id": i, 

1220 "obs_id": f"exp{i}", 

1221 "seq_num": i, 

1222 "day_obs": dayobs, 

1223 "physical_filter": "d-r", 

1224 }, 

1225 ) 

1226 

1227 # Write some data. 

1228 for i in range(n_exposures): 

1229 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1230 

1231 # Use the seq_num for the put to test rewriting. 

1232 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1233 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1234 

1235 # Check that the exposure is correct in the dataId 

1236 self.assertEqual(ref.dataId["exposure"], i) 

1237 

1238 # and check that we can get the dataset back with the same dataId 

1239 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1240 self.assertEqual(new_metric, metric) 

1241 

1242 

1243class FileDatastoreButlerTests(ButlerTests): 

1244 """Common tests and specialization of ButlerTests for butlers backed 

1245 by datastores that inherit from FileDatastore. 

1246 """ 

1247 

1248 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1249 """Check if file exists at a given path (relative to root). 

1250 

1251 Test testPutTemplates verifies actual physical existance of the files 

1252 in the requested location. 

1253 """ 

1254 uri = ResourcePath(root, forceDirectory=True) 

1255 return uri.join(relpath).exists() 

1256 

1257 def testPutTemplates(self) -> None: 

1258 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1259 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1260 

1261 # Add needed Dimensions 

1262 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1263 butler.registry.insertDimensionData( 

1264 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1265 ) 

1266 butler.registry.insertDimensionData( 

1267 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1268 ) 

1269 butler.registry.insertDimensionData( 

1270 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1271 ) 

1272 

1273 # Create and store a dataset 

1274 metric = makeExampleMetrics() 

1275 

1276 # Create two almost-identical DatasetTypes (both will use default 

1277 # template) 

1278 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1279 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1280 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1281 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1282 

1283 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1284 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1285 

1286 # Put with exactly the data ID keys needed 

1287 ref = butler.put(metric, "metric1", dataId1) 

1288 uri = butler.getURI(ref) 

1289 self.assertTrue(uri.exists()) 

1290 self.assertTrue( 

1291 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1292 ) 

1293 

1294 # Check the template based on dimensions 

1295 if hasattr(butler._datastore, "templates"): 

1296 butler._datastore.templates.validateTemplates([ref]) 

1297 

1298 # Put with extra data ID keys (physical_filter is an optional 

1299 # dependency); should not change template (at least the way we're 

1300 # defining them to behave now; the important thing is that they 

1301 # must be consistent). 

1302 ref = butler.put(metric, "metric2", dataId2) 

1303 uri = butler.getURI(ref) 

1304 self.assertTrue(uri.exists()) 

1305 self.assertTrue( 

1306 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1307 ) 

1308 

1309 # Check the template based on dimensions 

1310 if hasattr(butler._datastore, "templates"): 

1311 butler._datastore.templates.validateTemplates([ref]) 

1312 

1313 # Use a template that has a typo in dimension record metadata. 

1314 # Easier to test with a butler that has a ref with records attached. 

1315 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1316 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1317 path = template.format(ref) 

1318 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1319 

1320 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1321 with self.assertRaises(KeyError): 

1322 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1323 template.format(ref) 

1324 

1325 # Now use a file template that will not result in unique filenames 

1326 with self.assertRaises(FileTemplateValidationError): 

1327 butler.put(metric, "metric3", dataId1) 

1328 

1329 def testImportExport(self) -> None: 

1330 # Run put/get tests just to create and populate a repo. 

1331 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1332 self.runImportExportTest(storageClass) 

1333 

1334 @unittest.expectedFailure 

1335 def testImportExportVirtualComposite(self) -> None: 

1336 # Run put/get tests just to create and populate a repo. 

1337 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1338 self.runImportExportTest(storageClass) 

1339 

1340 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1341 """Test exporting and importing. 

1342 

1343 This test does an export to a temp directory and an import back 

1344 into a new temp directory repo. It does not assume a posix datastore. 

1345 """ 

1346 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1347 

1348 # Test that we must have a file extension. 

1349 with self.assertRaises(ValueError): 

1350 with exportButler.export(filename="dump", directory=".") as export: 

1351 pass 

1352 

1353 # Test that unknown format is not allowed. 

1354 with self.assertRaises(ValueError): 

1355 with exportButler.export(filename="dump.fits", directory=".") as export: 

1356 pass 

1357 

1358 # Test that the repo actually has at least one dataset. 

1359 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1360 self.assertGreater(len(datasets), 0) 

1361 # Add a DimensionRecord that's unused by those datasets. 

1362 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1363 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1364 # Export and then import datasets. 

1365 with safeTestTempDir(TESTDIR) as exportDir: 

1366 exportFile = os.path.join(exportDir, "exports.yaml") 

1367 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1368 export.saveDatasets(datasets) 

1369 # Export the same datasets again. This should quietly do 

1370 # nothing because of internal deduplication, and it shouldn't 

1371 # complain about being asked to export the "htm7" elements even 

1372 # though there aren't any in these datasets or in the database. 

1373 export.saveDatasets(datasets, elements=["htm7"]) 

1374 # Save one of the data IDs again; this should be harmless 

1375 # because of internal deduplication. 

1376 export.saveDataIds([datasets[0].dataId]) 

1377 # Save some dimension records directly. 

1378 export.saveDimensionData("skymap", [skymapRecord]) 

1379 self.assertTrue(os.path.exists(exportFile)) 

1380 with safeTestTempDir(TESTDIR) as importDir: 

1381 # We always want this to be a local posix butler 

1382 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1383 # Calling script.butlerImport tests the implementation of the 

1384 # butler command line interface "import" subcommand. Functions 

1385 # in the script folder are generally considered protected and 

1386 # should not be used as public api. 

1387 with open(exportFile) as f: 

1388 script.butlerImport( 

1389 importDir, 

1390 export_file=f, 

1391 directory=exportDir, 

1392 transfer="auto", 

1393 skip_dimensions=None, 

1394 ) 

1395 importButler = Butler.from_config(importDir, run=self.default_run) 

1396 for ref in datasets: 

1397 with self.subTest(ref=ref): 

1398 # Test for existence by passing in the DatasetType and 

1399 # data ID separately, to avoid lookup by dataset_id. 

1400 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1401 self.assertEqual( 

1402 list(importButler.registry.queryDimensionRecords("skymap")), 

1403 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)], 

1404 ) 

1405 

1406 def testRemoveRuns(self) -> None: 

1407 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1408 butler = Butler.from_config(self.tmpConfigFile, writeable=True) 

1409 # Load registry data with dimensions to hang datasets off of. 

1410 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1411 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1412 # Add some RUN-type collection. 

1413 run1 = "run1" 

1414 butler.registry.registerRun(run1) 

1415 run2 = "run2" 

1416 butler.registry.registerRun(run2) 

1417 # put a dataset in each 

1418 metric = makeExampleMetrics() 

1419 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1420 datasetType = self.addDatasetType( 

1421 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1422 ) 

1423 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1424 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1425 uri1 = butler.getURI(ref1) 

1426 uri2 = butler.getURI(ref2) 

1427 

1428 with self.assertRaises(OrphanedRecordError): 

1429 butler.registry.removeDatasetType(datasetType.name) 

1430 

1431 # Remove from both runs with different values for unstore. 

1432 butler.removeRuns([run1], unstore=True) 

1433 butler.removeRuns([run2], unstore=False) 

1434 # Should be nothing in registry for either one, and datastore should 

1435 # not think either exists. 

1436 with self.assertRaises(MissingCollectionError): 

1437 butler.registry.getCollectionType(run1) 

1438 with self.assertRaises(MissingCollectionError): 

1439 butler.registry.getCollectionType(run2) 

1440 self.assertFalse(butler.stored(ref1)) 

1441 self.assertFalse(butler.stored(ref2)) 

1442 # The ref we unstored should be gone according to the URI, but the 

1443 # one we forgot should still be around. 

1444 self.assertFalse(uri1.exists()) 

1445 self.assertTrue(uri2.exists()) 

1446 

1447 # Now that the collections have been pruned we can remove the 

1448 # dataset type 

1449 butler.registry.removeDatasetType(datasetType.name) 

1450 

1451 with self.assertLogs("lsst.daf.butler.registry", "INFO") as cm: 

1452 butler.registry.removeDatasetType(("test*", "test*")) 

1453 self.assertIn("not defined", "\n".join(cm.output)) 

1454 

1455 

1456class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1457 """PosixDatastore specialization of a butler""" 

1458 

1459 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1460 fullConfigKey: str | None = ".datastore.formatters" 

1461 validationCanFail = True 

1462 datastoreStr = ["/tmp"] 

1463 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1464 registryStr = "/gen3.sqlite3" 

1465 

1466 def testPathConstructor(self) -> None: 

1467 """Independent test of constructor using PathLike.""" 

1468 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1469 self.assertIsInstance(butler, Butler) 

1470 

1471 # And again with a Path object with the butler yaml 

1472 path = pathlib.Path(self.tmpConfigFile) 

1473 butler = Butler.from_config(path, writeable=False) 

1474 self.assertIsInstance(butler, Butler) 

1475 

1476 # And again with a Path object without the butler yaml 

1477 # (making sure we skip it if the tmp config doesn't end 

1478 # in butler.yaml -- which is the case for a subclass) 

1479 if self.tmpConfigFile.endswith("butler.yaml"): 

1480 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1481 butler = Butler.from_config(path, writeable=False) 

1482 self.assertIsInstance(butler, Butler) 

1483 

1484 def testExportTransferCopy(self) -> None: 

1485 """Test local export using all transfer modes""" 

1486 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1487 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1488 # Test that the repo actually has at least one dataset. 

1489 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1490 self.assertGreater(len(datasets), 0) 

1491 uris = [exportButler.getURI(d) for d in datasets] 

1492 assert isinstance(exportButler._datastore, FileDatastore) 

1493 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]] 

1494 

1495 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1496 

1497 for path in pathsInStore: 

1498 # Assume local file system 

1499 assert path is not None 

1500 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1501 

1502 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1503 with safeTestTempDir(TESTDIR) as exportDir: 

1504 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1505 export.saveDatasets(datasets) 

1506 for path in pathsInStore: 

1507 assert path is not None 

1508 self.assertTrue( 

1509 self.checkFileExists(exportDir, path), 

1510 f"Check that mode {transfer} exported files", 

1511 ) 

1512 

1513 def testPruneDatasets(self) -> None: 

1514 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1515 butler = Butler.from_config(self.tmpConfigFile, writeable=True) 

1516 assert isinstance(butler._datastore, FileDatastore) 

1517 # Load registry data with dimensions to hang datasets off of. 

1518 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1519 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1520 # Add some RUN-type collections. 

1521 run1 = "run1" 

1522 butler.registry.registerRun(run1) 

1523 run2 = "run2" 

1524 butler.registry.registerRun(run2) 

1525 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1526 # different runs. ref3 has a different data ID. 

1527 metric = makeExampleMetrics() 

1528 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1529 datasetType = self.addDatasetType( 

1530 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1531 ) 

1532 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1533 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1534 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1535 

1536 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1537 for ref, stored in many_stored.items(): 

1538 self.assertTrue(stored, f"Ref {ref} should be stored") 

1539 

1540 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1541 for ref, exists in many_exists.items(): 

1542 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1543 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1544 

1545 # Simple prune. 

1546 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1547 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1548 

1549 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1550 for ref, stored in many_stored.items(): 

1551 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1552 

1553 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1554 for ref, exists in many_exists.items(): 

1555 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1556 

1557 # Put data back. 

1558 ref1_new = butler.put(metric, ref1) 

1559 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1560 ref2 = butler.put(metric, ref2) 

1561 

1562 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1563 self.assertTrue(many_stored[ref1]) 

1564 self.assertTrue(many_stored[ref2]) 

1565 self.assertFalse(many_stored[ref3]) 

1566 

1567 ref3 = butler.put(metric, ref3) 

1568 

1569 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1570 for ref, exists in many_exists.items(): 

1571 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1572 

1573 # Clear out the datasets from registry and start again. 

1574 refs = [ref1, ref2, ref3] 

1575 butler.pruneDatasets(refs, purge=True, unstore=True) 

1576 for ref in refs: 

1577 butler.put(metric, ref) 

1578 

1579 # Confirm we can retrieve deferred. 

1580 dref1 = butler.getDeferred(ref1) # known and exists 

1581 metric1 = dref1.get() 

1582 self.assertEqual(metric1, metric) 

1583 

1584 # Test different forms of file availability. 

1585 # Need to be in a state where: 

1586 # - one ref just has registry record. 

1587 # - one ref has a missing file but a datastore record. 

1588 # - one ref has a missing datastore record but file is there. 

1589 # - one ref does not exist anywhere. 

1590 # Do not need to test a ref that has everything since that is tested 

1591 # above. 

1592 ref0 = DatasetRef( 

1593 datasetType, 

1594 DataCoordinate.standardize( 

1595 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1596 ), 

1597 run=run1, 

1598 ) 

1599 

1600 # Delete from datastore and retain in Registry. 

1601 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1602 

1603 # File has been removed. 

1604 uri2 = butler.getURI(ref2) 

1605 uri2.remove() 

1606 

1607 # Datastore has lost track. 

1608 butler._datastore.forget([ref3]) 

1609 

1610 # First test with a standard butler. 

1611 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1612 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1613 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1614 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1615 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1616 

1617 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1618 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1619 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1620 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1621 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1622 self.assertTrue(exists_many[ref2]) 

1623 

1624 # Check that per-ref query gives the same answer as many query. 

1625 for ref, exists in exists_many.items(): 

1626 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1627 

1628 # Get deferred checks for existence before it allows it to be 

1629 # retrieved. 

1630 with self.assertRaises(LookupError): 

1631 butler.getDeferred(ref3) # not known, file exists 

1632 dref2 = butler.getDeferred(ref2) # known but file missing 

1633 with self.assertRaises(FileNotFoundError): 

1634 dref2.get() 

1635 

1636 # Test again with a trusting butler. 

1637 butler._datastore.trustGetRequest = True 

1638 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1639 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1640 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1641 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1642 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1643 

1644 # When trusting we can get a deferred dataset handle that is not 

1645 # known but does exist. 

1646 dref3 = butler.getDeferred(ref3) 

1647 metric3 = dref3.get() 

1648 self.assertEqual(metric3, metric) 

1649 

1650 # Check that per-ref query gives the same answer as many query. 

1651 for ref, exists in exists_many.items(): 

1652 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1653 

1654 # Create a ref that surprisingly has the UUID of an existing ref 

1655 # but is not the same. 

1656 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1657 with self.assertRaises(ValueError): 

1658 butler.exists(ref_bad) 

1659 

1660 # Create a ref that has a compatible storage class. 

1661 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1662 exists = butler.exists(ref_compat) 

1663 self.assertEqual(exists, exists_many[ref2]) 

1664 

1665 # Remove everything and start from scratch. 

1666 butler._datastore.trustGetRequest = False 

1667 butler.pruneDatasets(refs, purge=True, unstore=True) 

1668 for ref in refs: 

1669 butler.put(metric, ref) 

1670 

1671 # These tests mess directly with the trash table and can leave the 

1672 # datastore in an odd state. Do them at the end. 

1673 # Check that in normal mode, deleting the record will lead to 

1674 # trash not touching the file. 

1675 uri1 = butler.getURI(ref1) 

1676 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1677 butler._datastore.forget([ref1]) 

1678 butler._datastore.trash(ref1) 

1679 butler._datastore.emptyTrash() 

1680 self.assertTrue(uri1.exists()) 

1681 uri1.remove() # Clean it up. 

1682 

1683 # Simulate execution butler setup by deleting the datastore 

1684 # record but keeping the file around and trusting. 

1685 butler._datastore.trustGetRequest = True 

1686 uris = butler.get_many_uris([ref2, ref3]) 

1687 uri2 = uris[ref2].primaryURI 

1688 uri3 = uris[ref3].primaryURI 

1689 self.assertTrue(uri2.exists()) 

1690 self.assertTrue(uri3.exists()) 

1691 

1692 # Remove the datastore record. 

1693 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1694 butler._datastore.forget([ref2]) 

1695 self.assertTrue(uri2.exists()) 

1696 butler._datastore.trash([ref2, ref3]) 

1697 # Immediate removal for ref2 file 

1698 self.assertFalse(uri2.exists()) 

1699 # But ref3 has to wait for the empty. 

1700 self.assertTrue(uri3.exists()) 

1701 butler._datastore.emptyTrash() 

1702 self.assertFalse(uri3.exists()) 

1703 

1704 # Clear out the datasets from registry. 

1705 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1706 

1707 def testPytypeCoercion(self) -> None: 

1708 """Test python type coercion on Butler.get and put.""" 

1709 # Store some data with the normal example storage class. 

1710 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1711 datasetTypeName = "test_metric" 

1712 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1713 

1714 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1715 metric = butler.get(datasetTypeName, dataId=dataId) 

1716 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1717 

1718 datasetType_ori = butler.get_dataset_type(datasetTypeName) 

1719 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1720 

1721 # Now need to hack the registry dataset type definition. 

1722 # There is no API for this. 

1723 assert isinstance(butler._registry, SqlRegistry) 

1724 manager = butler._registry._managers.datasets 

1725 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1726 manager._db.update( 

1727 manager._static.dataset_type, 

1728 {"name": datasetTypeName}, 

1729 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1730 ) 

1731 

1732 # Force reset of dataset type cache 

1733 butler.registry.refresh() 

1734 

1735 datasetType_new = butler.get_dataset_type(datasetTypeName) 

1736 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1737 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1738 

1739 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1740 self.assertNotEqual(type(metric_model), type(metric)) 

1741 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1742 

1743 # Put the model and read it back to show that everything now 

1744 # works as normal. 

1745 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1746 metric_model_new = butler.get(metric_ref) 

1747 self.assertEqual(metric_model_new, metric_model) 

1748 

1749 # Hack the storage class again to something that will fail on the 

1750 # get with no conversion class. 

1751 manager._db.update( 

1752 manager._static.dataset_type, 

1753 {"name": datasetTypeName}, 

1754 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1755 ) 

1756 butler.registry.refresh() 

1757 

1758 with self.assertRaises(ValueError): 

1759 butler.get(datasetTypeName, dataId=dataId) 

1760 

1761 

1762@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1763class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1764 """PosixDatastore specialization of a butler using Postgres""" 

1765 

1766 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1767 fullConfigKey = ".datastore.formatters" 

1768 validationCanFail = True 

1769 datastoreStr = ["/tmp"] 

1770 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1771 registryStr = "PostgreSQL@test" 

1772 postgresql: Any 

1773 

1774 @staticmethod 

1775 def _handler(postgresql: Any) -> None: 

1776 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1777 with engine.begin() as connection: 

1778 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1779 

1780 @classmethod 

1781 def setUpClass(cls) -> None: 

1782 # Create the postgres test server. 

1783 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1784 cache_initialized_db=True, on_initialized=cls._handler 

1785 ) 

1786 super().setUpClass() 

1787 

1788 @classmethod 

1789 def tearDownClass(cls) -> None: 

1790 # Clean up any lingering SQLAlchemy engines/connections 

1791 # so they're closed before we shut down the server. 

1792 gc.collect() 

1793 cls.postgresql.clear_cache() 

1794 super().tearDownClass() 

1795 

1796 def setUp(self) -> None: 

1797 self.server = self.postgresql() 

1798 

1799 # Need to add a registry section to the config. 

1800 self._temp_config = False 

1801 config = Config(self.configFile) 

1802 config["registry", "db"] = self.server.url() 

1803 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1804 config.dump(fh) 

1805 self.configFile = fh.name 

1806 self._temp_config = True 

1807 super().setUp() 

1808 

1809 def tearDown(self) -> None: 

1810 self.server.stop() 

1811 if self._temp_config and os.path.exists(self.configFile): 

1812 os.remove(self.configFile) 

1813 super().tearDown() 

1814 

1815 def testMakeRepo(self) -> None: 

1816 # The base class test assumes that it's using sqlite and assumes 

1817 # the config file is acceptable to sqlite. 

1818 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1819 

1820 

1821class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1822 """InMemoryDatastore specialization of a butler""" 

1823 

1824 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1825 fullConfigKey = None 

1826 useTempRoot = False 

1827 validationCanFail = False 

1828 datastoreStr = ["datastore='InMemory"] 

1829 datastoreName = ["InMemoryDatastore@"] 

1830 registryStr = "/gen3.sqlite3" 

1831 

1832 def testIngest(self) -> None: 

1833 pass 

1834 

1835 

1836class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1837 """PosixDatastore specialization""" 

1838 

1839 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1840 fullConfigKey = ".datastore.datastores.1.formatters" 

1841 validationCanFail = True 

1842 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1843 datastoreName = [ 

1844 "InMemoryDatastore@", 

1845 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1846 "SecondDatastore", 

1847 ] 

1848 registryStr = "/gen3.sqlite3" 

1849 

1850 

1851class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1852 """Test that a yaml file in one location can refer to a root in another.""" 

1853 

1854 datastoreStr = ["dir1"] 

1855 # Disable the makeRepo test since we are deliberately not using 

1856 # butler.yaml as the config name. 

1857 fullConfigKey = None 

1858 

1859 def setUp(self) -> None: 

1860 self.root = makeTestTempDir(TESTDIR) 

1861 

1862 # Make a new repository in one place 

1863 self.dir1 = os.path.join(self.root, "dir1") 

1864 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1865 

1866 # Move the yaml file to a different place and add a "root" 

1867 self.dir2 = os.path.join(self.root, "dir2") 

1868 os.makedirs(self.dir2, exist_ok=True) 

1869 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1870 config = Config(configFile1) 

1871 config["root"] = self.dir1 

1872 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1873 config.dumpToUri(configFile2) 

1874 os.remove(configFile1) 

1875 self.tmpConfigFile = configFile2 

1876 

1877 def testFileLocations(self) -> None: 

1878 self.assertNotEqual(self.dir1, self.dir2) 

1879 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1880 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1881 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1882 

1883 

1884class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1885 """Test that a config file created by makeRepo outside of repo works.""" 

1886 

1887 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1888 

1889 def setUp(self) -> None: 

1890 self.root = makeTestTempDir(TESTDIR) 

1891 self.root2 = makeTestTempDir(TESTDIR) 

1892 

1893 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1894 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1895 

1896 def tearDown(self) -> None: 

1897 if os.path.exists(self.root2): 

1898 shutil.rmtree(self.root2, ignore_errors=True) 

1899 super().tearDown() 

1900 

1901 def testConfigExistence(self) -> None: 

1902 c = Config(self.tmpConfigFile) 

1903 uri_config = ResourcePath(c["root"]) 

1904 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1905 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1906 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1907 

1908 def testPutGet(self) -> None: 

1909 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1910 self.runPutGetTest(storageClass, "test_metric") 

1911 

1912 

1913class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1914 """Test that a config file created by makeRepo outside of repo works.""" 

1915 

1916 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1917 

1918 def setUp(self) -> None: 

1919 self.root = makeTestTempDir(TESTDIR) 

1920 self.root2 = makeTestTempDir(TESTDIR) 

1921 

1922 self.tmpConfigFile = self.root2 

1923 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1924 

1925 def testConfigExistence(self) -> None: 

1926 # Append the yaml file else Config constructor does not know the file 

1927 # type. 

1928 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1929 super().testConfigExistence() 

1930 

1931 

1932class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1933 """Test that a config file created by makeRepo outside of repo works.""" 

1934 

1935 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1936 

1937 def setUp(self) -> None: 

1938 self.root = makeTestTempDir(TESTDIR) 

1939 self.root2 = makeTestTempDir(TESTDIR) 

1940 

1941 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1942 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1943 

1944 

1945@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1946class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1947 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1948 a local in-memory SqlRegistry. 

1949 """ 

1950 

1951 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1952 fullConfigKey = None 

1953 validationCanFail = True 

1954 

1955 bucketName = "anybucketname" 

1956 """Name of the Bucket that will be used in the tests. The name is read from 

1957 the config file used with the tests during set-up. 

1958 """ 

1959 

1960 root = "butlerRoot/" 

1961 """Root repository directory expected to be used in case useTempRoot=False. 

1962 Otherwise the root is set to a 20 characters long randomly generated string 

1963 during set-up. 

1964 """ 

1965 

1966 datastoreStr = [f"datastore={root}"] 

1967 """Contains all expected root locations in a format expected to be 

1968 returned by Butler stringification. 

1969 """ 

1970 

1971 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1972 """The expected format of the S3 Datastore string.""" 

1973 

1974 registryStr = "/gen3.sqlite3" 

1975 """Expected format of the Registry string.""" 

1976 

1977 mock_s3 = mock_s3() 

1978 """The mocked s3 interface from moto.""" 

1979 

1980 def genRoot(self) -> str: 

1981 """Return a random string of len 20 to serve as a root 

1982 name for the temporary bucket repo. 

1983 

1984 This is equivalent to tempfile.mkdtemp as this is what self.root 

1985 becomes when useTempRoot is True. 

1986 """ 

1987 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1988 return rndstr + "/" 

1989 

1990 def setUp(self) -> None: 

1991 config = Config(self.configFile) 

1992 uri = ResourcePath(config[".datastore.datastore.root"]) 

1993 self.bucketName = uri.netloc 

1994 

1995 # Enable S3 mocking of tests. 

1996 self.mock_s3.start() 

1997 

1998 # set up some fake credentials if they do not exist 

1999 self.usingDummyCredentials = setAwsEnvCredentials() 

2000 

2001 if self.useTempRoot: 

2002 self.root = self.genRoot() 

2003 rooturi = f"s3://{self.bucketName}/{self.root}" 

2004 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

2005 

2006 # need local folder to store registry database 

2007 self.reg_dir = makeTestTempDir(TESTDIR) 

2008 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

2009 

2010 # MOTO needs to know that we expect Bucket bucketname to exist 

2011 # (this used to be the class attribute bucketName) 

2012 s3 = boto3.resource("s3") 

2013 s3.create_bucket(Bucket=self.bucketName) 

2014 

2015 self.datastoreStr = [f"datastore='{rooturi}'"] 

2016 self.datastoreName = [f"FileDatastore@{rooturi}"] 

2017 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

2018 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

2019 

2020 def tearDown(self) -> None: 

2021 s3 = boto3.resource("s3") 

2022 bucket = s3.Bucket(self.bucketName) 

2023 try: 

2024 bucket.objects.all().delete() 

2025 except botocore.exceptions.ClientError as e: 

2026 if e.response["Error"]["Code"] == "404": 

2027 # the key was not reachable - pass 

2028 pass 

2029 else: 

2030 raise 

2031 

2032 bucket = s3.Bucket(self.bucketName) 

2033 bucket.delete() 

2034 

2035 # Stop the S3 mock. 

2036 self.mock_s3.stop() 

2037 

2038 # unset any potentially set dummy credentials 

2039 if self.usingDummyCredentials: 

2040 unsetAwsEnvCredentials() 

2041 

2042 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

2043 shutil.rmtree(self.reg_dir, ignore_errors=True) 

2044 

2045 if self.useTempRoot and os.path.exists(self.root): 

2046 shutil.rmtree(self.root, ignore_errors=True) 

2047 

2048 super().tearDown() 

2049 

2050 

2051class PosixDatastoreTransfers(unittest.TestCase): 

2052 """Test data transfers between butlers. 

2053 

2054 Test for different managers. UUID to UUID and integer to integer are 

2055 tested. UUID to integer is not supported since we do not currently 

2056 want to allow that. Integer to UUID is supported with the caveat 

2057 that UUID4 will be generated and this will be incorrect for raw 

2058 dataset types. The test ignores that. 

2059 """ 

2060 

2061 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2062 storageClassFactory: StorageClassFactory 

2063 

2064 @classmethod 

2065 def setUpClass(cls) -> None: 

2066 cls.storageClassFactory = StorageClassFactory() 

2067 cls.storageClassFactory.addFromConfig(cls.configFile) 

2068 

2069 def setUp(self) -> None: 

2070 self.root = makeTestTempDir(TESTDIR) 

2071 self.config = Config(self.configFile) 

2072 

2073 def tearDown(self) -> None: 

2074 removeTestTempDir(self.root) 

2075 

2076 def create_butler(self, manager: str, label: str) -> Butler: 

2077 config = Config(self.configFile) 

2078 config["registry", "managers", "datasets"] = manager 

2079 return Butler.from_config( 

2080 Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True 

2081 ) 

2082 

2083 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

2084 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

2085 if manager1 is None: 

2086 manager1 = default 

2087 if manager2 is None: 

2088 manager2 = default 

2089 self.source_butler = self.create_butler(manager1, "1") 

2090 self.target_butler = self.create_butler(manager2, "2") 

2091 

2092 def testTransferUuidToUuid(self) -> None: 

2093 self.create_butlers() 

2094 self.assertButlerTransfers() 

2095 

2096 def _enable_trust(self, datastore: Datastore) -> None: 

2097 datastores = getattr(datastore, "datastores", [datastore]) 

2098 for this_datastore in datastores: 

2099 if hasattr(this_datastore, "trustGetRequest"): 

2100 this_datastore.trustGetRequest = True 

2101 

2102 def testTransferMissing(self) -> None: 

2103 """Test transfers where datastore records are missing. 

2104 

2105 This is how execution butler works. 

2106 """ 

2107 self.create_butlers() 

2108 

2109 # Configure the source butler to allow trust. 

2110 self._enable_trust(self.source_butler._datastore) 

2111 

2112 self.assertButlerTransfers(purge=True) 

2113 

2114 def testTransferMissingDisassembly(self) -> None: 

2115 """Test transfers where datastore records are missing. 

2116 

2117 This is how execution butler works. 

2118 """ 

2119 self.create_butlers() 

2120 

2121 # Configure the source butler to allow trust. 

2122 self._enable_trust(self.source_butler._datastore) 

2123 

2124 # Test disassembly. 

2125 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2126 

2127 def testAbsoluteURITransferDirect(self) -> None: 

2128 """Test transfer using an absolute URI.""" 

2129 self._absolute_transfer("auto") 

2130 

2131 def testAbsoluteURITransferCopy(self) -> None: 

2132 """Test transfer using an absolute URI.""" 

2133 self._absolute_transfer("copy") 

2134 

2135 def _absolute_transfer(self, transfer: str) -> None: 

2136 self.create_butlers() 

2137 

2138 storageClassName = "StructuredData" 

2139 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2140 datasetTypeName = "random_data" 

2141 run = "run1" 

2142 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2143 

2144 dimensions = self.source_butler.dimensions.conform(()) 

2145 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2146 self.source_butler.registry.registerDatasetType(datasetType) 

2147 

2148 metrics = makeExampleMetrics() 

2149 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2150 dataId = DataCoordinate.make_empty(self.source_butler.dimensions) 

2151 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2152 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2153 dataset = FileDataset(path=temp, refs=source_refs) 

2154 self.source_butler.ingest(dataset, transfer="direct") 

2155 

2156 self.target_butler.transfer_from( 

2157 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2158 ) 

2159 

2160 uri = self.target_butler.getURI(dataset.refs[0]) 

2161 if transfer == "auto": 

2162 self.assertEqual(uri, temp) 

2163 else: 

2164 self.assertNotEqual(uri, temp) 

2165 

2166 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2167 """Test that a run can be transferred to another butler.""" 

2168 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2169 datasetTypeName = "random_data" 

2170 

2171 # Test will create 3 collections and we will want to transfer 

2172 # two of those three. 

2173 runs = ["run1", "run2", "other"] 

2174 

2175 # Also want to use two different dataset types to ensure that 

2176 # grouping works. 

2177 datasetTypeNames = ["random_data", "random_data_2"] 

2178 

2179 # Create the run collections in the source butler. 

2180 for run in runs: 

2181 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2182 

2183 # Create dimensions in source butler. 

2184 n_exposures = 30 

2185 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2186 self.source_butler.registry.insertDimensionData( 

2187 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2188 ) 

2189 self.source_butler.registry.insertDimensionData( 

2190 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2191 ) 

2192 

2193 for i in range(n_exposures): 

2194 self.source_butler.registry.insertDimensionData( 

2195 "exposure", 

2196 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2197 ) 

2198 

2199 # Create dataset types in the source butler. 

2200 dimensions = self.source_butler.dimensions.conform(["instrument", "exposure"]) 

2201 for datasetTypeName in datasetTypeNames: 

2202 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2203 self.source_butler.registry.registerDatasetType(datasetType) 

2204 

2205 # Write a dataset to an unrelated run -- this will ensure that 

2206 # we are rewriting integer dataset ids in the target if necessary. 

2207 # Will not be relevant for UUID. 

2208 run = "distraction" 

2209 butler = Butler.from_config(butler=self.source_butler, run=run) 

2210 butler.put( 

2211 makeExampleMetrics(), 

2212 datasetTypeName, 

2213 exposure=1, 

2214 instrument="DummyCamComp", 

2215 physical_filter="d-r", 

2216 ) 

2217 

2218 # Write some example metrics to the source 

2219 butler = Butler.from_config(butler=self.source_butler) 

2220 

2221 # Set of DatasetRefs that should be in the list of refs to transfer 

2222 # but which will not be transferred. 

2223 deleted: set[DatasetRef] = set() 

2224 

2225 n_expected = 20 # Number of datasets expected to be transferred 

2226 source_refs = [] 

2227 for i in range(n_exposures): 

2228 # Put a third of datasets into each collection, only retain 

2229 # two thirds. 

2230 index = i % 3 

2231 run = runs[index] 

2232 datasetTypeName = datasetTypeNames[i % 2] 

2233 

2234 metric = MetricsExample( 

2235 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2236 ) 

2237 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2238 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2239 

2240 # Remove the datastore record using low-level API, but only 

2241 # for a specific index. 

2242 if purge and index == 1: 

2243 # For one of these delete the file as well. 

2244 # This allows the "missing" code to filter the 

2245 # file out. 

2246 # Access the individual datastores. 

2247 datastores = [] 

2248 if hasattr(butler._datastore, "datastores"): 

2249 datastores.extend(butler._datastore.datastores) 

2250 else: 

2251 datastores.append(butler._datastore) 

2252 

2253 if not deleted: 

2254 # For a chained datastore we need to remove 

2255 # files in each chain. 

2256 for datastore in datastores: 

2257 # The file might not be known to the datastore 

2258 # if constraints are used. 

2259 try: 

2260 primary, uris = datastore.getURIs(ref) 

2261 except FileNotFoundError: 

2262 continue 

2263 if primary and primary.scheme != "mem": 

2264 primary.remove() 

2265 for uri in uris.values(): 

2266 if uri.scheme != "mem": 

2267 uri.remove() 

2268 n_expected -= 1 

2269 deleted.add(ref) 

2270 

2271 # Remove the datastore record. 

2272 for datastore in datastores: 

2273 if hasattr(datastore, "removeStoredItemInfo"): 

2274 datastore.removeStoredItemInfo(ref) 

2275 

2276 if index < 2: 

2277 source_refs.append(ref) 

2278 if ref not in deleted: 

2279 new_metric = butler.get(ref) 

2280 self.assertEqual(new_metric, metric) 

2281 

2282 # Create some bad dataset types to ensure we check for inconsistent 

2283 # definitions. 

2284 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2285 for datasetTypeName in datasetTypeNames: 

2286 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2287 self.target_butler.registry.registerDatasetType(datasetType) 

2288 with self.assertRaises(ConflictingDefinitionError) as cm: 

2289 self.target_butler.transfer_from(self.source_butler, source_refs) 

2290 self.assertIn("dataset type differs", str(cm.exception)) 

2291 

2292 # And remove the bad definitions. 

2293 for datasetTypeName in datasetTypeNames: 

2294 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2295 

2296 # Transfer without creating dataset types should fail. 

2297 with self.assertRaises(KeyError): 

2298 self.target_butler.transfer_from(self.source_butler, source_refs) 

2299 

2300 # Transfer without creating dimensions should fail. 

2301 with self.assertRaises(ConflictingDefinitionError) as cm: 

2302 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2303 self.assertIn("dimension", str(cm.exception)) 

2304 

2305 # The failed transfer above leaves registry in an inconsistent 

2306 # state because the run is created but then rolled back without 

2307 # the collection cache being cleared. For now force a refresh. 

2308 # Can remove with DM-35498. 

2309 self.target_butler.registry.refresh() 

2310 

2311 # Now transfer them to the second butler, including dimensions. 

2312 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2313 transferred = self.target_butler.transfer_from( 

2314 self.source_butler, 

2315 source_refs, 

2316 register_dataset_types=True, 

2317 transfer_dimensions=True, 

2318 ) 

2319 self.assertEqual(len(transferred), n_expected) 

2320 log_output = ";".join(log_cm.output) 

2321 

2322 # A ChainedDatastore will use the in-memory datastore for mexists 

2323 # so we can not rely on the mexists log message. 

2324 self.assertIn("Number of datastore records found in source", log_output) 

2325 self.assertIn("Creating output run", log_output) 

2326 

2327 # Do the transfer twice to ensure that it will do nothing extra. 

2328 # Only do this if purge=True because it does not work for int 

2329 # dataset_id. 

2330 if purge: 

2331 # This should not need to register dataset types. 

2332 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2333 self.assertEqual(len(transferred), n_expected) 

2334 

2335 # Also do an explicit low-level transfer to trigger some 

2336 # edge cases. 

2337 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2338 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs) 

2339 log_output = ";".join(log_cm.output) 

2340 self.assertIn("no file artifacts exist", log_output) 

2341 

2342 with self.assertRaises((TypeError, AttributeError)): 

2343 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2344 

2345 with self.assertRaises(ValueError): 

2346 self.target_butler._datastore.transfer_from( 

2347 self.source_butler._datastore, source_refs, transfer="split" 

2348 ) 

2349 

2350 # Now try to get the same refs from the new butler. 

2351 for ref in source_refs: 

2352 if ref not in deleted: 

2353 new_metric = self.target_butler.get(ref) 

2354 old_metric = self.source_butler.get(ref) 

2355 self.assertEqual(new_metric, old_metric) 

2356 

2357 # Now prune run2 collection and create instead a CHAINED collection. 

2358 # This should block the transfer. 

2359 self.target_butler.removeRuns(["run2"], unstore=True) 

2360 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2361 with self.assertRaises(CollectionTypeError): 

2362 # Re-importing the run1 datasets can be problematic if they 

2363 # use integer IDs so filter those out. 

2364 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2365 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2366 

2367 

2368class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2369 """Test transfers using a chained datastore.""" 

2370 

2371 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2372 

2373 

2374class NullDatastoreTestCase(unittest.TestCase): 

2375 """Test that we can fall back to a null datastore.""" 

2376 

2377 # Need a good config to create the repo. 

2378 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2379 storageClassFactory: StorageClassFactory 

2380 

2381 @classmethod 

2382 def setUpClass(cls) -> None: 

2383 cls.storageClassFactory = StorageClassFactory() 

2384 cls.storageClassFactory.addFromConfig(cls.configFile) 

2385 

2386 def setUp(self) -> None: 

2387 """Create a new butler root for each test.""" 

2388 self.root = makeTestTempDir(TESTDIR) 

2389 Butler.makeRepo(self.root, config=Config(self.configFile)) 

2390 

2391 def tearDown(self) -> None: 

2392 removeTestTempDir(self.root) 

2393 

2394 def test_fallback(self) -> None: 

2395 # Read the butler config and mess with the datastore section. 

2396 bad_config = Config(os.path.join(self.root, "butler.yaml")) 

2397 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" 

2398 

2399 with self.assertRaises(RuntimeError): 

2400 Butler.from_config(bad_config) 

2401 

2402 butler = Butler.from_config(bad_config, writeable=True, without_datastore=True) 

2403 self.assertIsInstance(butler._datastore, NullDatastore) 

2404 

2405 # Check that registry is working. 

2406 butler.registry.registerRun("MYRUN") 

2407 collections = butler.registry.queryCollections(...) 

2408 self.assertIn("MYRUN", set(collections)) 

2409 

2410 # Create a ref. 

2411 dimensions = butler.dimensions.conform([]) 

2412 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2413 datasetTypeName = "metric" 

2414 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2415 butler.registry.registerDatasetType(datasetType) 

2416 ref = DatasetRef(datasetType, {}, run="MYRUN") 

2417 

2418 # Check that datastore will complain. 

2419 with self.assertRaises(FileNotFoundError): 

2420 butler.get(ref) 

2421 with self.assertRaises(FileNotFoundError): 

2422 butler.getURI(ref) 

2423 

2424 

2425def setup_module(module: types.ModuleType) -> None: 

2426 """Set up the module for pytest.""" 

2427 clean_environment() 

2428 

2429 

2430if __name__ == "__main__": 

2431 clean_environment() 

2432 unittest.main()