Coverage for tests/test_butler.py: 13%

1337 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-25 10:50 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Tests for Butler. 

29""" 

30from __future__ import annotations 

31 

32import gc 

33import json 

34import logging 

35import os 

36import pathlib 

37import pickle 

38import posixpath 

39import random 

40import shutil 

41import string 

42import tempfile 

43import unittest 

44import uuid 

45from collections.abc import Mapping 

46from typing import TYPE_CHECKING, Any, cast 

47 

48try: 

49 import boto3 

50 import botocore 

51 from lsst.resources.s3utils import clean_test_environment_for_s3 

52 from moto import mock_s3 # type: ignore[import] 

53except ImportError: 

54 boto3 = None 

55 

56 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] 

57 """No-op decorator in case moto mock_s3 can not be imported.""" 

58 return None 

59 

60 

61try: 

62 # It's possible but silly to have testing.postgresql installed without 

63 # having the postgresql server installed (because then nothing in 

64 # testing.postgresql would work), so we use the presence of that module 

65 # to test whether we can expect the server to be available. 

66 import testing.postgresql # type: ignore[import] 

67except ImportError: 

68 testing = None 

69 

70import astropy.time 

71import sqlalchemy 

72from lsst.daf.butler import ( 

73 Butler, 

74 ButlerConfig, 

75 ButlerRepoIndex, 

76 CollectionType, 

77 Config, 

78 DataCoordinate, 

79 DatasetExistence, 

80 DatasetRef, 

81 DatasetType, 

82 FileDataset, 

83 StorageClassFactory, 

84 ValidationError, 

85 script, 

86) 

87from lsst.daf.butler.datastore import NullDatastore 

88from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError 

89from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

90from lsst.daf.butler.direct_butler import DirectButler 

91from lsst.daf.butler.registry import ( 

92 CollectionError, 

93 CollectionTypeError, 

94 ConflictingDefinitionError, 

95 DataIdValueError, 

96 MissingCollectionError, 

97 OrphanedRecordError, 

98) 

99from lsst.daf.butler.registry.sql_registry import SqlRegistry 

100from lsst.daf.butler.repo_relocation import BUTLER_ROOT_TAG 

101from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

102from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

103from lsst.resources import ResourcePath 

104from lsst.utils import doImportType 

105from lsst.utils.introspection import get_full_type_name 

106 

107if TYPE_CHECKING: 

108 import types 

109 

110 from lsst.daf.butler import Datastore, DimensionGroup, Registry, StorageClass 

111 

112TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

113 

114 

115def clean_environment() -> None: 

116 """Remove external environment variables that affect the tests.""" 

117 for k in ("DAF_BUTLER_REPOSITORY_INDEX",): 

118 os.environ.pop(k, None) 

119 

120 

121def makeExampleMetrics() -> MetricsExample: 

122 """Return example dataset suitable for tests.""" 

123 return MetricsExample( 

124 {"AM1": 5.2, "AM2": 30.6}, 

125 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

126 [563, 234, 456.7, 752, 8, 9, 27], 

127 ) 

128 

129 

130class TransactionTestError(Exception): 

131 """Specific error for testing transactions, to prevent misdiagnosing 

132 that might otherwise occur when a standard exception is used. 

133 """ 

134 

135 pass 

136 

137 

138class ButlerConfigTests(unittest.TestCase): 

139 """Simple tests for ButlerConfig that are not tested in any other test 

140 cases. 

141 """ 

142 

143 def testSearchPath(self) -> None: 

144 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

145 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

146 config1 = ButlerConfig(configFile) 

147 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

148 

149 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

150 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

151 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

152 self.assertIn("testConfigs", "\n".join(cm.output)) 

153 

154 key = ("datastore", "records", "table") 

155 self.assertNotEqual(config1[key], config2[key]) 

156 self.assertEqual(config2[key], "override_record") 

157 

158 

159class ButlerPutGetTests(TestCaseMixin): 

160 """Helper method for running a suite of put/get tests from different 

161 butler configurations. 

162 """ 

163 

164 root: str 

165 default_run = "ingésτ😺" 

166 storageClassFactory: StorageClassFactory 

167 configFile: str 

168 tmpConfigFile: str 

169 

170 @staticmethod 

171 def addDatasetType( 

172 datasetTypeName: str, dimensions: DimensionGroup, storageClass: StorageClass | str, registry: Registry 

173 ) -> DatasetType: 

174 """Create a DatasetType and register it""" 

175 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

176 registry.registerDatasetType(datasetType) 

177 return datasetType 

178 

179 @classmethod 

180 def setUpClass(cls) -> None: 

181 cls.storageClassFactory = StorageClassFactory() 

182 cls.storageClassFactory.addFromConfig(cls.configFile) 

183 

184 def assertGetComponents( 

185 self, 

186 butler: Butler, 

187 datasetRef: DatasetRef, 

188 components: tuple[str, ...], 

189 reference: Any, 

190 collections: Any = None, 

191 ) -> None: 

192 datasetType = datasetRef.datasetType 

193 dataId = datasetRef.dataId 

194 deferred = butler.getDeferred(datasetRef) 

195 

196 for component in components: 

197 compTypeName = datasetType.componentTypeName(component) 

198 result = butler.get(compTypeName, dataId, collections=collections) 

199 self.assertEqual(result, getattr(reference, component)) 

200 result_deferred = deferred.get(component=component) 

201 self.assertEqual(result_deferred, result) 

202 

203 def tearDown(self) -> None: 

204 removeTestTempDir(self.root) 

205 

206 def create_butler( 

207 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

208 ) -> tuple[DirectButler, DatasetType]: 

209 butler = Butler.from_config(self.tmpConfigFile, run=run) 

210 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

211 

212 collections = set(butler.registry.queryCollections()) 

213 self.assertEqual(collections, {run}) 

214 

215 # Create and register a DatasetType 

216 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

217 

218 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

219 

220 # Add needed Dimensions 

221 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

222 butler.registry.insertDimensionData( 

223 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

224 ) 

225 butler.registry.insertDimensionData( 

226 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

227 ) 

228 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

229 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

230 butler.registry.insertDimensionData( 

231 "visit", 

232 { 

233 "instrument": "DummyCamComp", 

234 "id": 423, 

235 "name": "fourtwentythree", 

236 "physical_filter": "d-r", 

237 "datetime_begin": visit_start, 

238 "datetime_end": visit_end, 

239 }, 

240 ) 

241 

242 # Add more visits for some later tests 

243 for visit_id in (424, 425): 

244 butler.registry.insertDimensionData( 

245 "visit", 

246 { 

247 "instrument": "DummyCamComp", 

248 "id": visit_id, 

249 "name": f"fourtwentyfour_{visit_id}", 

250 "physical_filter": "d-r", 

251 }, 

252 ) 

253 return butler, datasetType 

254 

255 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> DirectButler: 

256 # New datasets will be added to run and tag, but we will only look in 

257 # tag when looking up datasets. 

258 run = self.default_run 

259 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

260 assert butler.run is not None 

261 

262 # Create and store a dataset 

263 metric = makeExampleMetrics() 

264 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

265 

266 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

267 # and once with a DatasetType 

268 

269 # Keep track of any collections we add and do not clean up 

270 expected_collections = {run} 

271 

272 counter = 0 

273 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

274 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

275 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

276 # Since we are using subTest we can get cascading failures 

277 # here with the first attempt failing and the others failing 

278 # immediately because the dataset already exists. Work around 

279 # this by using a distinct run collection each time 

280 counter += 1 

281 this_run = f"put_run_{counter}" 

282 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

283 expected_collections.update({this_run}) 

284 

285 with self.subTest(args=args): 

286 kwargs: dict[str, Any] = {} 

287 if not isinstance(args[0], DatasetRef): # type: ignore 

288 kwargs["run"] = this_run 

289 ref = butler.put(metric, *args, **kwargs) 

290 self.assertIsInstance(ref, DatasetRef) 

291 

292 # Test get of a ref. 

293 metricOut = butler.get(ref) 

294 self.assertEqual(metric, metricOut) 

295 # Test get 

296 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

297 self.assertEqual(metric, metricOut) 

298 # Test get with a datasetRef 

299 metricOut = butler.get(ref) 

300 self.assertEqual(metric, metricOut) 

301 # Test getDeferred with dataId 

302 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

303 self.assertEqual(metric, metricOut) 

304 # Test getDeferred with a ref 

305 metricOut = butler.getDeferred(ref).get() 

306 self.assertEqual(metric, metricOut) 

307 

308 # Check we can get components 

309 if storageClass.isComposite(): 

310 self.assertGetComponents( 

311 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

312 ) 

313 

314 primary_uri, secondary_uris = butler.getURIs(ref) 

315 n_uris = len(secondary_uris) 

316 if primary_uri: 

317 n_uris += 1 

318 

319 # Can the artifacts themselves be retrieved? 

320 if not butler._datastore.isEphemeral: 

321 # Create a temporary directory to hold the retrieved 

322 # artifacts. 

323 with tempfile.TemporaryDirectory( 

324 prefix="butler-artifacts-", ignore_cleanup_errors=True 

325 ) as artifact_root: 

326 root_uri = ResourcePath(artifact_root, forceDirectory=True) 

327 

328 for preserve_path in (True, False): 

329 destination = root_uri.join(f"{preserve_path}_{counter}/") 

330 log = logging.getLogger("lsst.x") 

331 log.warning("Using destination %s for args %s", destination, args) 

332 # Use copy so that we can test that overwrite 

333 # protection works (using "auto" for File URIs 

334 # would use hard links and subsequent transfer 

335 # would work because it knows they are the same 

336 # file). 

337 transferred = butler.retrieveArtifacts( 

338 [ref], destination, preserve_path=preserve_path, transfer="copy" 

339 ) 

340 self.assertGreater(len(transferred), 0) 

341 artifacts = list(ResourcePath.findFileResources([destination])) 

342 self.assertEqual(set(transferred), set(artifacts)) 

343 

344 for artifact in transferred: 

345 path_in_destination = artifact.relative_to(destination) 

346 self.assertIsNotNone(path_in_destination) 

347 assert path_in_destination is not None 

348 

349 # When path is not preserved there should not 

350 # be any path separators. 

351 num_seps = path_in_destination.count("/") 

352 if preserve_path: 

353 self.assertGreater(num_seps, 0) 

354 else: 

355 self.assertEqual(num_seps, 0) 

356 

357 self.assertEqual( 

358 len(artifacts), 

359 n_uris, 

360 "Comparing expected artifacts vs actual:" 

361 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

362 ) 

363 

364 if preserve_path: 

365 # No need to run these twice 

366 with self.assertRaises(ValueError): 

367 butler.retrieveArtifacts([ref], destination, transfer="move") 

368 

369 with self.assertRaises(FileExistsError): 

370 butler.retrieveArtifacts([ref], destination) 

371 

372 transferred_again = butler.retrieveArtifacts( 

373 [ref], destination, preserve_path=preserve_path, overwrite=True 

374 ) 

375 self.assertEqual(set(transferred_again), set(transferred)) 

376 

377 # Now remove the dataset completely. 

378 butler.pruneDatasets([ref], purge=True, unstore=True) 

379 # Lookup with original args should still fail. 

380 kwargs = {"collections": this_run} 

381 if isinstance(args[0], DatasetRef): 

382 kwargs = {} # Prevent warning from being issued. 

383 self.assertFalse(butler.exists(*args, **kwargs)) 

384 # get() should still fail. 

385 with self.assertRaises(FileNotFoundError): 

386 butler.get(ref) 

387 # Registry shouldn't be able to find it by dataset_id anymore. 

388 self.assertIsNone(butler.get_dataset(ref.id)) 

389 

390 # Do explicit registry removal since we know they are 

391 # empty 

392 butler.registry.removeCollection(this_run) 

393 expected_collections.remove(this_run) 

394 

395 # Create DatasetRef for put using default run. 

396 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

397 

398 # Check that getDeferred fails with standalone ref. 

399 with self.assertRaises(LookupError): 

400 butler.getDeferred(refIn) 

401 

402 # Put the dataset again, since the last thing we did was remove it 

403 # and we want to use the default collection. 

404 ref = butler.put(metric, refIn) 

405 

406 # Get with parameters 

407 stop = 4 

408 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

409 self.assertNotEqual(metric, sliced) 

410 self.assertEqual(metric.summary, sliced.summary) 

411 self.assertEqual(metric.output, sliced.output) 

412 assert metric.data is not None # for mypy 

413 self.assertEqual(metric.data[:stop], sliced.data) 

414 # getDeferred with parameters 

415 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

416 self.assertNotEqual(metric, sliced) 

417 self.assertEqual(metric.summary, sliced.summary) 

418 self.assertEqual(metric.output, sliced.output) 

419 self.assertEqual(metric.data[:stop], sliced.data) 

420 # getDeferred with deferred parameters 

421 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

422 self.assertNotEqual(metric, sliced) 

423 self.assertEqual(metric.summary, sliced.summary) 

424 self.assertEqual(metric.output, sliced.output) 

425 self.assertEqual(metric.data[:stop], sliced.data) 

426 

427 if storageClass.isComposite(): 

428 # Check that components can be retrieved 

429 metricOut = butler.get(ref.datasetType.name, dataId) 

430 compNameS = ref.datasetType.componentTypeName("summary") 

431 compNameD = ref.datasetType.componentTypeName("data") 

432 summary = butler.get(compNameS, dataId) 

433 self.assertEqual(summary, metric.summary) 

434 data = butler.get(compNameD, dataId) 

435 self.assertEqual(data, metric.data) 

436 

437 if "counter" in storageClass.derivedComponents: 

438 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

439 self.assertEqual(count, len(data)) 

440 

441 count = butler.get( 

442 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

443 ) 

444 self.assertEqual(count, stop) 

445 

446 compRef = butler.find_dataset(compNameS, dataId, collections=butler.collections) 

447 assert compRef is not None 

448 summary = butler.get(compRef) 

449 self.assertEqual(summary, metric.summary) 

450 

451 # Create a Dataset type that has the same name but is inconsistent. 

452 inconsistentDatasetType = DatasetType( 

453 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

454 ) 

455 

456 # Getting with a dataset type that does not match registry fails 

457 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"): 

458 butler.get(inconsistentDatasetType, dataId) 

459 

460 # Combining a DatasetRef with a dataId should fail 

461 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

462 butler.get(ref, dataId) 

463 # Getting with an explicit ref should fail if the id doesn't match. 

464 with self.assertRaises(FileNotFoundError): 

465 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

466 

467 # Getting a dataset with unknown parameters should fail 

468 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

469 butler.get(ref, parameters={"unsupported": True}) 

470 

471 # Check we have a collection 

472 collections = set(butler.registry.queryCollections()) 

473 self.assertEqual(collections, expected_collections) 

474 

475 # Clean up to check that we can remove something that may have 

476 # already had a component removed 

477 butler.pruneDatasets([ref], unstore=True, purge=True) 

478 

479 # Add the same ref again, so we can check that duplicate put fails. 

480 ref = butler.put(metric, datasetType, dataId) 

481 

482 # Repeat put will fail. 

483 with self.assertRaisesRegex( 

484 ConflictingDefinitionError, "A database constraint failure was triggered" 

485 ): 

486 butler.put(metric, datasetType, dataId) 

487 

488 # Remove the datastore entry. 

489 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

490 

491 # Put will still fail 

492 with self.assertRaisesRegex( 

493 ConflictingDefinitionError, "A database constraint failure was triggered" 

494 ): 

495 butler.put(metric, datasetType, dataId) 

496 

497 # Repeat the same sequence with resolved ref. 

498 butler.pruneDatasets([ref], unstore=True, purge=True) 

499 ref = butler.put(metric, refIn) 

500 

501 # Repeat put will fail. 

502 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

503 butler.put(metric, refIn) 

504 

505 # Remove the datastore entry. 

506 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

507 

508 # In case of resolved ref this write will succeed. 

509 ref = butler.put(metric, refIn) 

510 

511 # Leave the dataset in place since some downstream tests require 

512 # something to be present 

513 

514 return butler 

515 

516 def testDeferredCollectionPassing(self) -> None: 

517 # Construct a butler with no run or collection, but make it writeable. 

518 butler = Butler.from_config(self.tmpConfigFile, writeable=True) 

519 # Create and register a DatasetType 

520 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

521 datasetType = self.addDatasetType( 

522 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

523 ) 

524 # Add needed Dimensions 

525 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

526 butler.registry.insertDimensionData( 

527 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

528 ) 

529 butler.registry.insertDimensionData( 

530 "visit", 

531 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

532 ) 

533 dataId = {"instrument": "DummyCamComp", "visit": 423} 

534 # Create dataset. 

535 metric = makeExampleMetrics() 

536 # Register a new run and put dataset. 

537 run = "deferred" 

538 self.assertTrue(butler.registry.registerRun(run)) 

539 # Second time it will be allowed but indicate no-op 

540 self.assertFalse(butler.registry.registerRun(run)) 

541 ref = butler.put(metric, datasetType, dataId, run=run) 

542 # Putting with no run should fail with TypeError. 

543 with self.assertRaises(CollectionError): 

544 butler.put(metric, datasetType, dataId) 

545 # Dataset should exist. 

546 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

547 # We should be able to get the dataset back, but with and without 

548 # a deferred dataset handle. 

549 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

550 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

551 # Trying to find the dataset without any collection is a TypeError. 

552 self.assertFalse(butler.exists(datasetType, dataId)) 

553 with self.assertRaises(CollectionError): 

554 butler.get(datasetType, dataId) 

555 # Associate the dataset with a different collection. 

556 butler.registry.registerCollection("tagged") 

557 butler.registry.associate("tagged", [ref]) 

558 # Deleting the dataset from the new collection should make it findable 

559 # in the original collection. 

560 butler.pruneDatasets([ref], tags=["tagged"]) 

561 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

562 

563 

564class ButlerTests(ButlerPutGetTests): 

565 """Tests for Butler.""" 

566 

567 useTempRoot = True 

568 validationCanFail: bool 

569 fullConfigKey: str | None 

570 registryStr: str | None 

571 datastoreName: list[str] | None 

572 datastoreStr: list[str] 

573 

574 def setUp(self) -> None: 

575 """Create a new butler root for each test.""" 

576 self.root = makeTestTempDir(TESTDIR) 

577 Butler.makeRepo(self.root, config=Config(self.configFile)) 

578 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

579 

580 def testConstructor(self) -> None: 

581 """Independent test of constructor.""" 

582 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

583 self.assertIsInstance(butler, Butler) 

584 

585 # Check that butler.yaml is added automatically. 

586 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

587 config_dir = self.tmpConfigFile[: -len(end)] 

588 butler = Butler.from_config(config_dir, run=self.default_run) 

589 self.assertIsInstance(butler, Butler) 

590 

591 # Even with a ResourcePath. 

592 butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

593 self.assertIsInstance(butler, Butler) 

594 

595 collections = set(butler.registry.queryCollections()) 

596 self.assertEqual(collections, {self.default_run}) 

597 

598 # Check that some special characters can be included in run name. 

599 special_run = "u@b.c-A" 

600 butler_special = Butler.from_config(butler=butler, run=special_run) 

601 collections = set(butler_special.registry.queryCollections("*@*")) 

602 self.assertEqual(collections, {special_run}) 

603 

604 butler2 = Butler.from_config(butler=butler, collections=["other"]) 

605 self.assertEqual(butler2.collections, ("other",)) 

606 self.assertIsNone(butler2.run) 

607 self.assertEqual(type(butler._datastore), type(butler2._datastore)) 

608 self.assertEqual(butler._datastore.config, butler2._datastore.config) 

609 

610 # Test that we can use an environment variable to find this 

611 # repository. 

612 butler_index = Config() 

613 butler_index["label"] = self.tmpConfigFile 

614 for suffix in (".yaml", ".json"): 

615 # Ensure that the content differs so that we know that 

616 # we aren't reusing the cache. 

617 bad_label = f"file://bucket/not_real{suffix}" 

618 butler_index["bad_label"] = bad_label 

619 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

620 butler_index.dumpToUri(temp_file) 

621 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

622 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"}) 

623 uri = Butler.get_repo_uri("bad_label") 

624 self.assertEqual(uri, ResourcePath(bad_label)) 

625 uri = Butler.get_repo_uri("label") 

626 butler = Butler.from_config(uri, writeable=False) 

627 self.assertIsInstance(butler, Butler) 

628 butler = Butler.from_config("label", writeable=False) 

629 self.assertIsInstance(butler, Butler) 

630 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

631 Butler.from_config("not_there", writeable=False) 

632 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): 

633 Butler.from_config("bad_label") 

634 with self.assertRaises(FileNotFoundError): 

635 # Should ignore aliases. 

636 Butler.from_config(ResourcePath("label", forceAbsolute=False)) 

637 with self.assertRaises(KeyError) as cm: 

638 Butler.get_repo_uri("missing") 

639 self.assertEqual( 

640 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False) 

641 ) 

642 self.assertIn("not known to", str(cm.exception)) 

643 # Should report no failure. 

644 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "") 

645 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

646 # Now with empty configuration. 

647 butler_index = Config() 

648 butler_index.dumpToUri(temp_file) 

649 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

650 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): 

651 Butler.from_config("label") 

652 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

653 # Now with bad contents. 

654 with open(temp_file.ospath, "w") as fh: 

655 print("'", file=fh) 

656 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

657 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): 

658 Butler.from_config("label") 

659 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

660 with self.assertRaises(FileNotFoundError): 

661 Butler.get_repo_uri("label") 

662 self.assertEqual(Butler.get_known_repos(), set()) 

663 

664 with self.assertRaisesRegex(FileNotFoundError, "index file not found"): 

665 Butler.from_config("label") 

666 

667 # Check that we can create Butler when the alias file is not found. 

668 butler = Butler.from_config(self.tmpConfigFile, writeable=False) 

669 self.assertIsInstance(butler, Butler) 

670 with self.assertRaises(KeyError) as cm: 

671 # No environment variable set. 

672 Butler.get_repo_uri("label") 

673 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) 

674 self.assertIn("No repository index defined", str(cm.exception)) 

675 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): 

676 # No aliases registered. 

677 Butler.from_config("not_there") 

678 self.assertEqual(Butler.get_known_repos(), set()) 

679 

680 def testBasicPutGet(self) -> None: 

681 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

682 self.runPutGetTest(storageClass, "test_metric") 

683 

684 def testCompositePutGetConcrete(self) -> None: 

685 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

686 butler = self.runPutGetTest(storageClass, "test_metric") 

687 

688 # Should *not* be disassembled 

689 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

690 self.assertEqual(len(datasets), 1) 

691 uri, components = butler.getURIs(datasets[0]) 

692 self.assertIsInstance(uri, ResourcePath) 

693 self.assertFalse(components) 

694 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

695 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

696 

697 # Predicted dataset 

698 dataId = {"instrument": "DummyCamComp", "visit": 424} 

699 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

700 self.assertFalse(components) 

701 self.assertIsInstance(uri, ResourcePath) 

702 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

703 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

704 

705 def testCompositePutGetVirtual(self) -> None: 

706 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

707 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

708 

709 # Should be disassembled 

710 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

711 self.assertEqual(len(datasets), 1) 

712 uri, components = butler.getURIs(datasets[0]) 

713 

714 if butler._datastore.isEphemeral: 

715 # Never disassemble in-memory datastore 

716 self.assertIsInstance(uri, ResourcePath) 

717 self.assertFalse(components) 

718 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

719 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

720 else: 

721 self.assertIsNone(uri) 

722 self.assertEqual(set(components), set(storageClass.components)) 

723 for compuri in components.values(): 

724 self.assertIsInstance(compuri, ResourcePath) 

725 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

726 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

727 

728 # Predicted dataset 

729 dataId = {"instrument": "DummyCamComp", "visit": 424} 

730 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

731 

732 if butler._datastore.isEphemeral: 

733 # Never disassembled 

734 self.assertIsInstance(uri, ResourcePath) 

735 self.assertFalse(components) 

736 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

737 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

738 else: 

739 self.assertIsNone(uri) 

740 self.assertEqual(set(components), set(storageClass.components)) 

741 for compuri in components.values(): 

742 self.assertIsInstance(compuri, ResourcePath) 

743 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

744 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

745 

746 def testStorageClassOverrideGet(self) -> None: 

747 """Test storage class conversion on get with override.""" 

748 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

749 datasetTypeName = "anything" 

750 run = self.default_run 

751 

752 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

753 

754 # Create and store a dataset. 

755 metric = makeExampleMetrics() 

756 dataId = {"instrument": "DummyCamComp", "visit": 423} 

757 

758 ref = butler.put(metric, datasetType, dataId) 

759 

760 # Return native type. 

761 retrieved = butler.get(ref) 

762 self.assertEqual(retrieved, metric) 

763 

764 # Specify an override. 

765 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

766 model = butler.get(ref, storageClass=new_sc) 

767 self.assertNotEqual(type(model), type(retrieved)) 

768 self.assertIs(type(model), new_sc.pytype) 

769 self.assertEqual(retrieved, model) 

770 

771 # Defer but override later. 

772 deferred = butler.getDeferred(ref) 

773 model = deferred.get(storageClass=new_sc) 

774 self.assertIs(type(model), new_sc.pytype) 

775 self.assertEqual(retrieved, model) 

776 

777 # Defer but override up front. 

778 deferred = butler.getDeferred(ref, storageClass=new_sc) 

779 model = deferred.get() 

780 self.assertIs(type(model), new_sc.pytype) 

781 self.assertEqual(retrieved, model) 

782 

783 # Retrieve a component. Should be a tuple. 

784 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

785 self.assertIs(type(data), tuple) 

786 self.assertEqual(data, tuple(retrieved.data)) 

787 

788 # Parameter on the write storage class should work regardless 

789 # of read storage class. 

790 data = butler.get( 

791 "anything.data", 

792 dataId, 

793 storageClass="StructuredDataDataTestTuple", 

794 parameters={"slice": slice(2, 4)}, 

795 ) 

796 self.assertEqual(len(data), 2) 

797 

798 # Try a parameter that is known to the read storage class but not 

799 # the write storage class. 

800 with self.assertRaises(KeyError): 

801 butler.get( 

802 "anything.data", 

803 dataId, 

804 storageClass="StructuredDataDataTestTuple", 

805 parameters={"xslice": slice(2, 4)}, 

806 ) 

807 

808 def testPytypePutCoercion(self) -> None: 

809 """Test python type coercion on Butler.get and put.""" 

810 # Store some data with the normal example storage class. 

811 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

812 datasetTypeName = "test_metric" 

813 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

814 

815 dataId = {"instrument": "DummyCamComp", "visit": 423} 

816 

817 # Put a dict and this should coerce to a MetricsExample 

818 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

819 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

820 test_metric = butler.get(metric_ref) 

821 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

822 self.assertEqual(test_metric.summary, test_dict["summary"]) 

823 self.assertEqual(test_metric.output, test_dict["output"]) 

824 

825 # Check that the put still works if a DatasetType is given with 

826 # a definition matching this python type. 

827 registry_type = butler.get_dataset_type(datasetTypeName) 

828 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

829 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

830 self.assertEqual(metric2_ref.datasetType, registry_type) 

831 

832 # The get will return the type expected by registry. 

833 test_metric2 = butler.get(metric2_ref) 

834 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

835 

836 # Make a new DatasetRef with the compatible but different DatasetType. 

837 # This should now return a dict. 

838 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

839 test_dict2 = butler.get(new_ref) 

840 self.assertEqual(get_full_type_name(test_dict2), "dict") 

841 

842 # Get it again with the wrong dataset type definition using get() 

843 # rather than get(). This should be consistent with get() 

844 # behavior and return the type of the DatasetType. 

845 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

846 self.assertEqual(get_full_type_name(test_dict3), "dict") 

847 

848 def testIngest(self) -> None: 

849 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

850 

851 # Create and register a DatasetType 

852 dimensions = butler.dimensions.conform(["instrument", "visit", "detector"]) 

853 

854 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

855 datasetTypeName = "metric" 

856 

857 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

858 

859 # Add needed Dimensions 

860 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

861 butler.registry.insertDimensionData( 

862 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

863 ) 

864 for detector in (1, 2): 

865 butler.registry.insertDimensionData( 

866 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

867 ) 

868 

869 butler.registry.insertDimensionData( 

870 "visit", 

871 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

872 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

873 ) 

874 

875 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

876 dataRoot = os.path.join(TESTDIR, "data", "basic") 

877 datasets = [] 

878 for detector in (1, 2): 

879 detector_name = f"detector_{detector}" 

880 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

881 dataId = butler.registry.expandDataId( 

882 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

883 ) 

884 # Create a DatasetRef for ingest 

885 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

886 

887 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

888 

889 butler.ingest(*datasets, transfer="copy") 

890 

891 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

892 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

893 

894 metrics1 = butler.get(datasetTypeName, dataId1) 

895 metrics2 = butler.get(datasetTypeName, dataId2) 

896 self.assertNotEqual(metrics1, metrics2) 

897 

898 # Compare URIs 

899 uri1 = butler.getURI(datasetTypeName, dataId1) 

900 uri2 = butler.getURI(datasetTypeName, dataId2) 

901 self.assertNotEqual(uri1, uri2) 

902 

903 # Now do a multi-dataset but single file ingest 

904 metricFile = os.path.join(dataRoot, "detectors.yaml") 

905 refs = [] 

906 for detector in (1, 2): 

907 detector_name = f"detector_{detector}" 

908 dataId = butler.registry.expandDataId( 

909 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

910 ) 

911 # Create a DatasetRef for ingest 

912 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

913 

914 # Test "move" transfer to ensure that the files themselves 

915 # have disappeared following ingest. 

916 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

917 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

918 

919 datasets = [] 

920 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

921 

922 # For first ingest use copy. 

923 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

924 

925 # Now try to ingest again in "execution butler" mode where 

926 # the registry entries exist but the datastore does not have 

927 # the files. We also need to strip the dimension records to ensure 

928 # that they will be re-added by the ingest. 

929 ref = datasets[0].refs[0] 

930 datasets[0].refs = [ 

931 cast( 

932 DatasetRef, 

933 butler.find_dataset(ref.datasetType, data_id=ref.dataId, collections=ref.run), 

934 ) 

935 for ref in datasets[0].refs 

936 ] 

937 all_refs = [] 

938 for dataset in datasets: 

939 refs = [] 

940 for ref in dataset.refs: 

941 # Create a dict from the dataId to drop the records. 

942 new_data_id = dict(ref.dataId.required) 

943 new_ref = butler.find_dataset(ref.datasetType, new_data_id, collections=ref.run) 

944 assert new_ref is not None 

945 self.assertFalse(new_ref.dataId.hasRecords()) 

946 refs.append(new_ref) 

947 dataset.refs = refs 

948 all_refs.extend(dataset.refs) 

949 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

950 

951 # Use move mode to test that the file is deleted. Also 

952 # disable recording of file size. 

953 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

954 

955 # Check that every ref now has records. 

956 for dataset in datasets: 

957 for ref in dataset.refs: 

958 self.assertTrue(ref.dataId.hasRecords()) 

959 

960 # Ensure that the file has disappeared. 

961 self.assertFalse(tempFile.exists()) 

962 

963 # Check that the datastore recorded no file size. 

964 # Not all datastores can support this. 

965 try: 

966 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

967 self.assertEqual(infos[0].file_size, -1) 

968 except AttributeError: 

969 pass 

970 

971 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

972 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

973 

974 multi1 = butler.get(datasetTypeName, dataId1) 

975 multi2 = butler.get(datasetTypeName, dataId2) 

976 

977 self.assertEqual(multi1, metrics1) 

978 self.assertEqual(multi2, metrics2) 

979 

980 # Compare URIs 

981 uri1 = butler.getURI(datasetTypeName, dataId1) 

982 uri2 = butler.getURI(datasetTypeName, dataId2) 

983 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

984 

985 # Test that removing one does not break the second 

986 # This line will issue a warning log message for a ChainedDatastore 

987 # that uses an InMemoryDatastore since in-memory can not ingest 

988 # files. 

989 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

990 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

991 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

992 multi2b = butler.get(datasetTypeName, dataId2) 

993 self.assertEqual(multi2, multi2b) 

994 

995 # Ensure we can ingest 0 datasets 

996 datasets = [] 

997 butler.ingest(*datasets) 

998 

999 def testPickle(self) -> None: 

1000 """Test pickle support.""" 

1001 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1002 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" 

1003 butlerOut = pickle.loads(pickle.dumps(butler)) 

1004 self.assertIsInstance(butlerOut, Butler) 

1005 self.assertEqual(butlerOut._config, butler._config) 

1006 self.assertEqual(butlerOut.collections, butler.collections) 

1007 self.assertEqual(butlerOut.run, butler.run) 

1008 

1009 def testGetDatasetTypes(self) -> None: 

1010 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1011 dimensions = butler.dimensions.conform(["instrument", "visit", "physical_filter"]) 

1012 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

1013 ( 

1014 "instrument", 

1015 [ 

1016 {"instrument": "DummyCam"}, 

1017 {"instrument": "DummyHSC"}, 

1018 {"instrument": "DummyCamComp"}, 

1019 ], 

1020 ), 

1021 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

1022 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]), 

1023 ] 

1024 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1025 # Add needed Dimensions 

1026 for element, data in dimensionEntries: 

1027 butler.registry.insertDimensionData(element, *data) 

1028 

1029 # When a DatasetType is added to the registry entries are not created 

1030 # for components but querying them can return the components. 

1031 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1032 components = set() 

1033 for datasetTypeName in datasetTypeNames: 

1034 # Create and register a DatasetType 

1035 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1036 

1037 for componentName in storageClass.components: 

1038 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1039 

1040 fromRegistry: set[DatasetType] = set() 

1041 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1042 fromRegistry.add(parent_dataset_type) 

1043 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1044 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1045 

1046 # Now that we have some dataset types registered, validate them 

1047 butler.validateConfiguration( 

1048 ignore=[ 

1049 "test_metric_comp", 

1050 "metric3", 

1051 "metric5", 

1052 "calexp", 

1053 "DummySC", 

1054 "datasetType.component", 

1055 "random_data", 

1056 "random_data_2", 

1057 ] 

1058 ) 

1059 

1060 # Add a new datasetType that will fail template validation 

1061 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1062 if self.validationCanFail: 

1063 with self.assertRaises(ValidationError): 

1064 butler.validateConfiguration() 

1065 

1066 # Rerun validation but with a subset of dataset type names 

1067 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1068 

1069 # Rerun validation but ignore the bad datasetType 

1070 butler.validateConfiguration( 

1071 ignore=[ 

1072 "test_metric_comp", 

1073 "metric3", 

1074 "metric5", 

1075 "calexp", 

1076 "DummySC", 

1077 "datasetType.component", 

1078 "random_data", 

1079 "random_data_2", 

1080 ] 

1081 ) 

1082 

1083 def testTransaction(self) -> None: 

1084 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1085 datasetTypeName = "test_metric" 

1086 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1087 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1088 ("instrument", {"instrument": "DummyCam"}), 

1089 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1090 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1091 ) 

1092 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1093 metric = makeExampleMetrics() 

1094 dataId = {"instrument": "DummyCam", "visit": 42} 

1095 # Create and register a DatasetType 

1096 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1097 with self.assertRaises(TransactionTestError): 

1098 with butler.transaction(): 

1099 # Add needed Dimensions 

1100 for args in dimensionEntries: 

1101 butler.registry.insertDimensionData(*args) 

1102 # Store a dataset 

1103 ref = butler.put(metric, datasetTypeName, dataId) 

1104 self.assertIsInstance(ref, DatasetRef) 

1105 # Test get of a ref. 

1106 metricOut = butler.get(ref) 

1107 self.assertEqual(metric, metricOut) 

1108 # Test get 

1109 metricOut = butler.get(datasetTypeName, dataId) 

1110 self.assertEqual(metric, metricOut) 

1111 # Check we can get components 

1112 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1113 raise TransactionTestError("This should roll back the entire transaction") 

1114 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1115 butler.registry.expandDataId(dataId) 

1116 # Should raise LookupError for missing data ID value 

1117 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1118 butler.get(datasetTypeName, dataId) 

1119 # Also check explicitly if Dataset entry is missing 

1120 self.assertIsNone(butler.find_dataset(datasetType, dataId, collections=butler.collections)) 

1121 # Direct retrieval should not find the file in the Datastore 

1122 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1123 butler.get(ref) 

1124 

1125 def testMakeRepo(self) -> None: 

1126 """Test that we can write butler configuration to a new repository via 

1127 the Butler.makeRepo interface and then instantiate a butler from the 

1128 repo root. 

1129 """ 

1130 # Do not run the test if we know this datastore configuration does 

1131 # not support a file system root 

1132 if self.fullConfigKey is None: 

1133 return 

1134 

1135 # create two separate directories 

1136 root1 = tempfile.mkdtemp(dir=self.root) 

1137 root2 = tempfile.mkdtemp(dir=self.root) 

1138 

1139 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1140 limited = Config(self.configFile) 

1141 butler1 = Butler.from_config(butlerConfig) 

1142 assert isinstance(butler1, DirectButler), "Expect DirectButler in configuration" 

1143 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1144 full = Config(self.tmpConfigFile) 

1145 butler2 = Butler.from_config(butlerConfig) 

1146 assert isinstance(butler2, DirectButler), "Expect DirectButler in configuration" 

1147 # Butlers should have the same configuration regardless of whether 

1148 # defaults were expanded. 

1149 self.assertEqual(butler1._config, butler2._config) 

1150 # Config files loaded directly should not be the same. 

1151 self.assertNotEqual(limited, full) 

1152 # Make sure "limited" doesn't have a few keys we know it should be 

1153 # inheriting from defaults. 

1154 self.assertIn(self.fullConfigKey, full) 

1155 self.assertNotIn(self.fullConfigKey, limited) 

1156 

1157 # Collections don't appear until something is put in them 

1158 collections1 = set(butler1.registry.queryCollections()) 

1159 self.assertEqual(collections1, set()) 

1160 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1161 

1162 # Check that a config with no associated file name will not 

1163 # work properly with relocatable Butler repo 

1164 butlerConfig.configFile = None 

1165 with self.assertRaises(ValueError): 

1166 Butler.from_config(butlerConfig) 

1167 

1168 with self.assertRaises(FileExistsError): 

1169 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1170 

1171 def testStringification(self) -> None: 

1172 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1173 butlerStr = str(butler) 

1174 

1175 if self.datastoreStr is not None: 

1176 for testStr in self.datastoreStr: 

1177 self.assertIn(testStr, butlerStr) 

1178 if self.registryStr is not None: 

1179 self.assertIn(self.registryStr, butlerStr) 

1180 

1181 datastoreName = butler._datastore.name 

1182 if self.datastoreName is not None: 

1183 for testStr in self.datastoreName: 

1184 self.assertIn(testStr, datastoreName) 

1185 

1186 def testButlerRewriteDataId(self) -> None: 

1187 """Test that dataIds can be rewritten based on dimension records.""" 

1188 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1189 

1190 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1191 datasetTypeName = "random_data" 

1192 

1193 # Create dimension records. 

1194 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1195 butler.registry.insertDimensionData( 

1196 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1197 ) 

1198 butler.registry.insertDimensionData( 

1199 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1200 ) 

1201 

1202 dimensions = butler.dimensions.conform(["instrument", "exposure"]) 

1203 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1204 butler.registry.registerDatasetType(datasetType) 

1205 

1206 n_exposures = 5 

1207 dayobs = 20210530 

1208 

1209 for i in range(n_exposures): 

1210 butler.registry.insertDimensionData( 

1211 "exposure", 

1212 { 

1213 "instrument": "DummyCamComp", 

1214 "id": i, 

1215 "obs_id": f"exp{i}", 

1216 "seq_num": i, 

1217 "day_obs": dayobs, 

1218 "physical_filter": "d-r", 

1219 }, 

1220 ) 

1221 

1222 # Write some data. 

1223 for i in range(n_exposures): 

1224 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1225 

1226 # Use the seq_num for the put to test rewriting. 

1227 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1228 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1229 

1230 # Check that the exposure is correct in the dataId 

1231 self.assertEqual(ref.dataId["exposure"], i) 

1232 

1233 # and check that we can get the dataset back with the same dataId 

1234 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1235 self.assertEqual(new_metric, metric) 

1236 

1237 def testGetDatasetCollectionCaching(self): 

1238 # Prior to DM-41117, there was a bug where get_dataset would throw 

1239 # MissingCollectionError if you tried to fetch a dataset that was added 

1240 # after the collection cache was last updated. 

1241 reader_butler, datasetType = self.create_butler(self.default_run, "int", "datasettypename") 

1242 writer_butler = Butler.from_config(self.tmpConfigFile, writeable=True, run="new_run") 

1243 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1244 put_ref = writer_butler.put(123, datasetType, dataId) 

1245 get_ref = reader_butler.get_dataset(put_ref.id) 

1246 self.assertEqual(get_ref.id, put_ref.id) 

1247 

1248 

1249class FileDatastoreButlerTests(ButlerTests): 

1250 """Common tests and specialization of ButlerTests for butlers backed 

1251 by datastores that inherit from FileDatastore. 

1252 """ 

1253 

1254 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1255 """Check if file exists at a given path (relative to root). 

1256 

1257 Test testPutTemplates verifies actual physical existance of the files 

1258 in the requested location. 

1259 """ 

1260 uri = ResourcePath(root, forceDirectory=True) 

1261 return uri.join(relpath).exists() 

1262 

1263 def testPutTemplates(self) -> None: 

1264 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1265 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1266 

1267 # Add needed Dimensions 

1268 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1269 butler.registry.insertDimensionData( 

1270 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1271 ) 

1272 butler.registry.insertDimensionData( 

1273 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1274 ) 

1275 butler.registry.insertDimensionData( 

1276 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1277 ) 

1278 

1279 # Create and store a dataset 

1280 metric = makeExampleMetrics() 

1281 

1282 # Create two almost-identical DatasetTypes (both will use default 

1283 # template) 

1284 dimensions = butler.dimensions.conform(["instrument", "visit"]) 

1285 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1286 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1287 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1288 

1289 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1290 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1291 

1292 # Put with exactly the data ID keys needed 

1293 ref = butler.put(metric, "metric1", dataId1) 

1294 uri = butler.getURI(ref) 

1295 self.assertTrue(uri.exists()) 

1296 self.assertTrue( 

1297 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1298 ) 

1299 

1300 # Check the template based on dimensions 

1301 if hasattr(butler._datastore, "templates"): 

1302 butler._datastore.templates.validateTemplates([ref]) 

1303 

1304 # Put with extra data ID keys (physical_filter is an optional 

1305 # dependency); should not change template (at least the way we're 

1306 # defining them to behave now; the important thing is that they 

1307 # must be consistent). 

1308 ref = butler.put(metric, "metric2", dataId2) 

1309 uri = butler.getURI(ref) 

1310 self.assertTrue(uri.exists()) 

1311 self.assertTrue( 

1312 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1313 ) 

1314 

1315 # Check the template based on dimensions 

1316 if hasattr(butler._datastore, "templates"): 

1317 butler._datastore.templates.validateTemplates([ref]) 

1318 

1319 # Use a template that has a typo in dimension record metadata. 

1320 # Easier to test with a butler that has a ref with records attached. 

1321 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1322 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1323 path = template.format(ref) 

1324 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1325 

1326 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1327 with self.assertRaises(KeyError): 

1328 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1329 template.format(ref) 

1330 

1331 # Now use a file template that will not result in unique filenames 

1332 with self.assertRaises(FileTemplateValidationError): 

1333 butler.put(metric, "metric3", dataId1) 

1334 

1335 def testImportExport(self) -> None: 

1336 # Run put/get tests just to create and populate a repo. 

1337 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1338 self.runImportExportTest(storageClass) 

1339 

1340 @unittest.expectedFailure 

1341 def testImportExportVirtualComposite(self) -> None: 

1342 # Run put/get tests just to create and populate a repo. 

1343 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1344 self.runImportExportTest(storageClass) 

1345 

1346 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1347 """Test exporting and importing. 

1348 

1349 This test does an export to a temp directory and an import back 

1350 into a new temp directory repo. It does not assume a posix datastore. 

1351 """ 

1352 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1353 

1354 # Test that we must have a file extension. 

1355 with self.assertRaises(ValueError): 

1356 with exportButler.export(filename="dump", directory=".") as export: 

1357 pass 

1358 

1359 # Test that unknown format is not allowed. 

1360 with self.assertRaises(ValueError): 

1361 with exportButler.export(filename="dump.fits", directory=".") as export: 

1362 pass 

1363 

1364 # Test that the repo actually has at least one dataset. 

1365 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1366 self.assertGreater(len(datasets), 0) 

1367 # Add a DimensionRecord that's unused by those datasets. 

1368 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1369 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1370 # Export and then import datasets. 

1371 with safeTestTempDir(TESTDIR) as exportDir: 

1372 exportFile = os.path.join(exportDir, "exports.yaml") 

1373 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1374 export.saveDatasets(datasets) 

1375 # Export the same datasets again. This should quietly do 

1376 # nothing because of internal deduplication, and it shouldn't 

1377 # complain about being asked to export the "htm7" elements even 

1378 # though there aren't any in these datasets or in the database. 

1379 export.saveDatasets(datasets, elements=["htm7"]) 

1380 # Save one of the data IDs again; this should be harmless 

1381 # because of internal deduplication. 

1382 export.saveDataIds([datasets[0].dataId]) 

1383 # Save some dimension records directly. 

1384 export.saveDimensionData("skymap", [skymapRecord]) 

1385 self.assertTrue(os.path.exists(exportFile)) 

1386 with safeTestTempDir(TESTDIR) as importDir: 

1387 # We always want this to be a local posix butler 

1388 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1389 # Calling script.butlerImport tests the implementation of the 

1390 # butler command line interface "import" subcommand. Functions 

1391 # in the script folder are generally considered protected and 

1392 # should not be used as public api. 

1393 with open(exportFile) as f: 

1394 script.butlerImport( 

1395 importDir, 

1396 export_file=f, 

1397 directory=exportDir, 

1398 transfer="auto", 

1399 skip_dimensions=None, 

1400 ) 

1401 importButler = Butler.from_config(importDir, run=self.default_run) 

1402 for ref in datasets: 

1403 with self.subTest(ref=ref): 

1404 # Test for existence by passing in the DatasetType and 

1405 # data ID separately, to avoid lookup by dataset_id. 

1406 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1407 self.assertEqual( 

1408 list(importButler.registry.queryDimensionRecords("skymap")), 

1409 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)], 

1410 ) 

1411 

1412 def testRemoveRuns(self) -> None: 

1413 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1414 butler = Butler.from_config(self.tmpConfigFile, writeable=True) 

1415 # Load registry data with dimensions to hang datasets off of. 

1416 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1417 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1418 # Add some RUN-type collection. 

1419 run1 = "run1" 

1420 butler.registry.registerRun(run1) 

1421 run2 = "run2" 

1422 butler.registry.registerRun(run2) 

1423 # put a dataset in each 

1424 metric = makeExampleMetrics() 

1425 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1426 datasetType = self.addDatasetType( 

1427 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1428 ) 

1429 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1430 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1431 uri1 = butler.getURI(ref1) 

1432 uri2 = butler.getURI(ref2) 

1433 

1434 with self.assertRaises(OrphanedRecordError): 

1435 butler.registry.removeDatasetType(datasetType.name) 

1436 

1437 # Remove from both runs with different values for unstore. 

1438 butler.removeRuns([run1], unstore=True) 

1439 butler.removeRuns([run2], unstore=False) 

1440 # Should be nothing in registry for either one, and datastore should 

1441 # not think either exists. 

1442 with self.assertRaises(MissingCollectionError): 

1443 butler.registry.getCollectionType(run1) 

1444 with self.assertRaises(MissingCollectionError): 

1445 butler.registry.getCollectionType(run2) 

1446 self.assertFalse(butler.stored(ref1)) 

1447 self.assertFalse(butler.stored(ref2)) 

1448 # The ref we unstored should be gone according to the URI, but the 

1449 # one we forgot should still be around. 

1450 self.assertFalse(uri1.exists()) 

1451 self.assertTrue(uri2.exists()) 

1452 

1453 # Now that the collections have been pruned we can remove the 

1454 # dataset type 

1455 butler.registry.removeDatasetType(datasetType.name) 

1456 

1457 with self.assertLogs("lsst.daf.butler.registry", "INFO") as cm: 

1458 butler.registry.removeDatasetType(("test*", "test*")) 

1459 self.assertIn("not defined", "\n".join(cm.output)) 

1460 

1461 

1462class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1463 """PosixDatastore specialization of a butler""" 

1464 

1465 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1466 fullConfigKey: str | None = ".datastore.formatters" 

1467 validationCanFail = True 

1468 datastoreStr = ["/tmp"] 

1469 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1470 registryStr = "/gen3.sqlite3" 

1471 

1472 def testPathConstructor(self) -> None: 

1473 """Independent test of constructor using PathLike.""" 

1474 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) 

1475 self.assertIsInstance(butler, Butler) 

1476 

1477 # And again with a Path object with the butler yaml 

1478 path = pathlib.Path(self.tmpConfigFile) 

1479 butler = Butler.from_config(path, writeable=False) 

1480 self.assertIsInstance(butler, Butler) 

1481 

1482 # And again with a Path object without the butler yaml 

1483 # (making sure we skip it if the tmp config doesn't end 

1484 # in butler.yaml -- which is the case for a subclass) 

1485 if self.tmpConfigFile.endswith("butler.yaml"): 

1486 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1487 butler = Butler.from_config(path, writeable=False) 

1488 self.assertIsInstance(butler, Butler) 

1489 

1490 def testExportTransferCopy(self) -> None: 

1491 """Test local export using all transfer modes""" 

1492 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1493 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1494 # Test that the repo actually has at least one dataset. 

1495 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1496 self.assertGreater(len(datasets), 0) 

1497 uris = [exportButler.getURI(d) for d in datasets] 

1498 assert isinstance(exportButler._datastore, FileDatastore) 

1499 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]] 

1500 

1501 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1502 

1503 for path in pathsInStore: 

1504 # Assume local file system 

1505 assert path is not None 

1506 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1507 

1508 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1509 with safeTestTempDir(TESTDIR) as exportDir: 

1510 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1511 export.saveDatasets(datasets) 

1512 for path in pathsInStore: 

1513 assert path is not None 

1514 self.assertTrue( 

1515 self.checkFileExists(exportDir, path), 

1516 f"Check that mode {transfer} exported files", 

1517 ) 

1518 

1519 def testPruneDatasets(self) -> None: 

1520 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1521 butler = Butler.from_config(self.tmpConfigFile, writeable=True) 

1522 assert isinstance(butler._datastore, FileDatastore) 

1523 # Load registry data with dimensions to hang datasets off of. 

1524 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1525 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1526 # Add some RUN-type collections. 

1527 run1 = "run1" 

1528 butler.registry.registerRun(run1) 

1529 run2 = "run2" 

1530 butler.registry.registerRun(run2) 

1531 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1532 # different runs. ref3 has a different data ID. 

1533 metric = makeExampleMetrics() 

1534 dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) 

1535 datasetType = self.addDatasetType( 

1536 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1537 ) 

1538 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1539 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1540 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1541 

1542 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1543 for ref, stored in many_stored.items(): 

1544 self.assertTrue(stored, f"Ref {ref} should be stored") 

1545 

1546 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1547 for ref, exists in many_exists.items(): 

1548 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1549 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1550 

1551 # Simple prune. 

1552 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1553 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1554 

1555 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1556 for ref, stored in many_stored.items(): 

1557 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1558 

1559 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1560 for ref, exists in many_exists.items(): 

1561 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1562 

1563 # Put data back. 

1564 ref1_new = butler.put(metric, ref1) 

1565 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1566 ref2 = butler.put(metric, ref2) 

1567 

1568 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1569 self.assertTrue(many_stored[ref1]) 

1570 self.assertTrue(many_stored[ref2]) 

1571 self.assertFalse(many_stored[ref3]) 

1572 

1573 ref3 = butler.put(metric, ref3) 

1574 

1575 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1576 for ref, exists in many_exists.items(): 

1577 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1578 

1579 # Clear out the datasets from registry and start again. 

1580 refs = [ref1, ref2, ref3] 

1581 butler.pruneDatasets(refs, purge=True, unstore=True) 

1582 for ref in refs: 

1583 butler.put(metric, ref) 

1584 

1585 # Confirm we can retrieve deferred. 

1586 dref1 = butler.getDeferred(ref1) # known and exists 

1587 metric1 = dref1.get() 

1588 self.assertEqual(metric1, metric) 

1589 

1590 # Test different forms of file availability. 

1591 # Need to be in a state where: 

1592 # - one ref just has registry record. 

1593 # - one ref has a missing file but a datastore record. 

1594 # - one ref has a missing datastore record but file is there. 

1595 # - one ref does not exist anywhere. 

1596 # Do not need to test a ref that has everything since that is tested 

1597 # above. 

1598 ref0 = DatasetRef( 

1599 datasetType, 

1600 DataCoordinate.standardize( 

1601 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1602 ), 

1603 run=run1, 

1604 ) 

1605 

1606 # Delete from datastore and retain in Registry. 

1607 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1608 

1609 # File has been removed. 

1610 uri2 = butler.getURI(ref2) 

1611 uri2.remove() 

1612 

1613 # Datastore has lost track. 

1614 butler._datastore.forget([ref3]) 

1615 

1616 # First test with a standard butler. 

1617 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1618 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1619 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1620 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1621 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1622 

1623 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1624 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1625 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1626 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1627 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1628 self.assertTrue(exists_many[ref2]) 

1629 

1630 # Check that per-ref query gives the same answer as many query. 

1631 for ref, exists in exists_many.items(): 

1632 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1633 

1634 # Get deferred checks for existence before it allows it to be 

1635 # retrieved. 

1636 with self.assertRaises(LookupError): 

1637 butler.getDeferred(ref3) # not known, file exists 

1638 dref2 = butler.getDeferred(ref2) # known but file missing 

1639 with self.assertRaises(FileNotFoundError): 

1640 dref2.get() 

1641 

1642 # Test again with a trusting butler. 

1643 butler._datastore.trustGetRequest = True 

1644 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1645 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1646 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1647 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1648 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1649 

1650 # When trusting we can get a deferred dataset handle that is not 

1651 # known but does exist. 

1652 dref3 = butler.getDeferred(ref3) 

1653 metric3 = dref3.get() 

1654 self.assertEqual(metric3, metric) 

1655 

1656 # Check that per-ref query gives the same answer as many query. 

1657 for ref, exists in exists_many.items(): 

1658 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1659 

1660 # Create a ref that surprisingly has the UUID of an existing ref 

1661 # but is not the same. 

1662 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1663 with self.assertRaises(ValueError): 

1664 butler.exists(ref_bad) 

1665 

1666 # Create a ref that has a compatible storage class. 

1667 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1668 exists = butler.exists(ref_compat) 

1669 self.assertEqual(exists, exists_many[ref2]) 

1670 

1671 # Remove everything and start from scratch. 

1672 butler._datastore.trustGetRequest = False 

1673 butler.pruneDatasets(refs, purge=True, unstore=True) 

1674 for ref in refs: 

1675 butler.put(metric, ref) 

1676 

1677 # These tests mess directly with the trash table and can leave the 

1678 # datastore in an odd state. Do them at the end. 

1679 # Check that in normal mode, deleting the record will lead to 

1680 # trash not touching the file. 

1681 uri1 = butler.getURI(ref1) 

1682 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1683 butler._datastore.forget([ref1]) 

1684 butler._datastore.trash(ref1) 

1685 butler._datastore.emptyTrash() 

1686 self.assertTrue(uri1.exists()) 

1687 uri1.remove() # Clean it up. 

1688 

1689 # Simulate execution butler setup by deleting the datastore 

1690 # record but keeping the file around and trusting. 

1691 butler._datastore.trustGetRequest = True 

1692 uris = butler.get_many_uris([ref2, ref3]) 

1693 uri2 = uris[ref2].primaryURI 

1694 uri3 = uris[ref3].primaryURI 

1695 self.assertTrue(uri2.exists()) 

1696 self.assertTrue(uri3.exists()) 

1697 

1698 # Remove the datastore record. 

1699 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1700 butler._datastore.forget([ref2]) 

1701 self.assertTrue(uri2.exists()) 

1702 butler._datastore.trash([ref2, ref3]) 

1703 # Immediate removal for ref2 file 

1704 self.assertFalse(uri2.exists()) 

1705 # But ref3 has to wait for the empty. 

1706 self.assertTrue(uri3.exists()) 

1707 butler._datastore.emptyTrash() 

1708 self.assertFalse(uri3.exists()) 

1709 

1710 # Clear out the datasets from registry. 

1711 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1712 

1713 def testPytypeCoercion(self) -> None: 

1714 """Test python type coercion on Butler.get and put.""" 

1715 # Store some data with the normal example storage class. 

1716 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1717 datasetTypeName = "test_metric" 

1718 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1719 

1720 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1721 metric = butler.get(datasetTypeName, dataId=dataId) 

1722 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1723 

1724 datasetType_ori = butler.get_dataset_type(datasetTypeName) 

1725 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1726 

1727 # Now need to hack the registry dataset type definition. 

1728 # There is no API for this. 

1729 assert isinstance(butler._registry, SqlRegistry) 

1730 manager = butler._registry._managers.datasets 

1731 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1732 manager._db.update( 

1733 manager._static.dataset_type, 

1734 {"name": datasetTypeName}, 

1735 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1736 ) 

1737 

1738 # Force reset of dataset type cache 

1739 butler.registry.refresh() 

1740 

1741 datasetType_new = butler.get_dataset_type(datasetTypeName) 

1742 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1743 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1744 

1745 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1746 self.assertNotEqual(type(metric_model), type(metric)) 

1747 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1748 

1749 # Put the model and read it back to show that everything now 

1750 # works as normal. 

1751 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1752 metric_model_new = butler.get(metric_ref) 

1753 self.assertEqual(metric_model_new, metric_model) 

1754 

1755 # Hack the storage class again to something that will fail on the 

1756 # get with no conversion class. 

1757 manager._db.update( 

1758 manager._static.dataset_type, 

1759 {"name": datasetTypeName}, 

1760 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1761 ) 

1762 butler.registry.refresh() 

1763 

1764 with self.assertRaises(ValueError): 

1765 butler.get(datasetTypeName, dataId=dataId) 

1766 

1767 

1768@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1769class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1770 """PosixDatastore specialization of a butler using Postgres""" 

1771 

1772 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1773 fullConfigKey = ".datastore.formatters" 

1774 validationCanFail = True 

1775 datastoreStr = ["/tmp"] 

1776 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1777 registryStr = "PostgreSQL@test" 

1778 postgresql: Any 

1779 

1780 @staticmethod 

1781 def _handler(postgresql: Any) -> None: 

1782 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1783 with engine.begin() as connection: 

1784 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1785 

1786 @classmethod 

1787 def setUpClass(cls) -> None: 

1788 # Create the postgres test server. 

1789 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1790 cache_initialized_db=True, on_initialized=cls._handler 

1791 ) 

1792 super().setUpClass() 

1793 

1794 @classmethod 

1795 def tearDownClass(cls) -> None: 

1796 # Clean up any lingering SQLAlchemy engines/connections 

1797 # so they're closed before we shut down the server. 

1798 gc.collect() 

1799 cls.postgresql.clear_cache() 

1800 super().tearDownClass() 

1801 

1802 def setUp(self) -> None: 

1803 self.server = self.postgresql() 

1804 

1805 # Need to add a registry section to the config. 

1806 self._temp_config = False 

1807 config = Config(self.configFile) 

1808 config["registry", "db"] = self.server.url() 

1809 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1810 config.dump(fh) 

1811 self.configFile = fh.name 

1812 self._temp_config = True 

1813 super().setUp() 

1814 

1815 def tearDown(self) -> None: 

1816 self.server.stop() 

1817 if self._temp_config and os.path.exists(self.configFile): 

1818 os.remove(self.configFile) 

1819 super().tearDown() 

1820 

1821 def testMakeRepo(self) -> None: 

1822 # The base class test assumes that it's using sqlite and assumes 

1823 # the config file is acceptable to sqlite. 

1824 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1825 

1826 

1827@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1828class ClonedPostgresPosixDatastoreButlerTestCase(PostgresPosixDatastoreButlerTestCase, unittest.TestCase): 

1829 """Test that Butler with a Postgres registry still works after cloning.""" 

1830 

1831 def create_butler( 

1832 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

1833 ) -> tuple[DirectButler, DatasetType]: 

1834 butler, datasetType = super().create_butler(run, storageClass, datasetTypeName) 

1835 return butler._clone(run=run), datasetType 

1836 

1837 

1838class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1839 """InMemoryDatastore specialization of a butler""" 

1840 

1841 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1842 fullConfigKey = None 

1843 useTempRoot = False 

1844 validationCanFail = False 

1845 datastoreStr = ["datastore='InMemory"] 

1846 datastoreName = ["InMemoryDatastore@"] 

1847 registryStr = "/gen3.sqlite3" 

1848 

1849 def testIngest(self) -> None: 

1850 pass 

1851 

1852 

1853class ClonedSqliteButlerTestCase(InMemoryDatastoreButlerTestCase, unittest.TestCase): 

1854 """Test that a Butler with a Sqlite registry still works after cloning.""" 

1855 

1856 def create_butler( 

1857 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

1858 ) -> tuple[DirectButler, DatasetType]: 

1859 butler, datasetType = super().create_butler(run, storageClass, datasetTypeName) 

1860 return butler._clone(run=run), datasetType 

1861 

1862 

1863class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1864 """PosixDatastore specialization""" 

1865 

1866 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1867 fullConfigKey = ".datastore.datastores.1.formatters" 

1868 validationCanFail = True 

1869 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1870 datastoreName = [ 

1871 "InMemoryDatastore@", 

1872 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1873 "SecondDatastore", 

1874 ] 

1875 registryStr = "/gen3.sqlite3" 

1876 

1877 

1878class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1879 """Test that a yaml file in one location can refer to a root in another.""" 

1880 

1881 datastoreStr = ["dir1"] 

1882 # Disable the makeRepo test since we are deliberately not using 

1883 # butler.yaml as the config name. 

1884 fullConfigKey = None 

1885 

1886 def setUp(self) -> None: 

1887 self.root = makeTestTempDir(TESTDIR) 

1888 

1889 # Make a new repository in one place 

1890 self.dir1 = os.path.join(self.root, "dir1") 

1891 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1892 

1893 # Move the yaml file to a different place and add a "root" 

1894 self.dir2 = os.path.join(self.root, "dir2") 

1895 os.makedirs(self.dir2, exist_ok=True) 

1896 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1897 config = Config(configFile1) 

1898 config["root"] = self.dir1 

1899 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1900 config.dumpToUri(configFile2) 

1901 os.remove(configFile1) 

1902 self.tmpConfigFile = configFile2 

1903 

1904 def testFileLocations(self) -> None: 

1905 self.assertNotEqual(self.dir1, self.dir2) 

1906 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1907 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1908 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1909 

1910 

1911class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1912 """Test that a config file created by makeRepo outside of repo works.""" 

1913 

1914 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1915 

1916 def setUp(self) -> None: 

1917 self.root = makeTestTempDir(TESTDIR) 

1918 self.root2 = makeTestTempDir(TESTDIR) 

1919 

1920 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1921 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1922 

1923 def tearDown(self) -> None: 

1924 if os.path.exists(self.root2): 

1925 shutil.rmtree(self.root2, ignore_errors=True) 

1926 super().tearDown() 

1927 

1928 def testConfigExistence(self) -> None: 

1929 c = Config(self.tmpConfigFile) 

1930 uri_config = ResourcePath(c["root"]) 

1931 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1932 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1933 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1934 

1935 def testPutGet(self) -> None: 

1936 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1937 self.runPutGetTest(storageClass, "test_metric") 

1938 

1939 

1940class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1941 """Test that a config file created by makeRepo outside of repo works.""" 

1942 

1943 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1944 

1945 def setUp(self) -> None: 

1946 self.root = makeTestTempDir(TESTDIR) 

1947 self.root2 = makeTestTempDir(TESTDIR) 

1948 

1949 self.tmpConfigFile = self.root2 

1950 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1951 

1952 def testConfigExistence(self) -> None: 

1953 # Append the yaml file else Config constructor does not know the file 

1954 # type. 

1955 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1956 super().testConfigExistence() 

1957 

1958 

1959class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1960 """Test that a config file created by makeRepo outside of repo works.""" 

1961 

1962 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1963 

1964 def setUp(self) -> None: 

1965 self.root = makeTestTempDir(TESTDIR) 

1966 self.root2 = makeTestTempDir(TESTDIR) 

1967 

1968 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1969 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1970 

1971 

1972@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1973class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1974 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1975 a local in-memory SqlRegistry. 

1976 """ 

1977 

1978 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1979 fullConfigKey = None 

1980 validationCanFail = True 

1981 

1982 bucketName = "anybucketname" 

1983 """Name of the Bucket that will be used in the tests. The name is read from 

1984 the config file used with the tests during set-up. 

1985 """ 

1986 

1987 root = "butlerRoot/" 

1988 """Root repository directory expected to be used in case useTempRoot=False. 

1989 Otherwise the root is set to a 20 characters long randomly generated string 

1990 during set-up. 

1991 """ 

1992 

1993 datastoreStr = [f"datastore={root}"] 

1994 """Contains all expected root locations in a format expected to be 

1995 returned by Butler stringification. 

1996 """ 

1997 

1998 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1999 """The expected format of the S3 Datastore string.""" 

2000 

2001 registryStr = "/gen3.sqlite3" 

2002 """Expected format of the Registry string.""" 

2003 

2004 mock_s3 = mock_s3() 

2005 """The mocked s3 interface from moto.""" 

2006 

2007 def genRoot(self) -> str: 

2008 """Return a random string of len 20 to serve as a root 

2009 name for the temporary bucket repo. 

2010 

2011 This is equivalent to tempfile.mkdtemp as this is what self.root 

2012 becomes when useTempRoot is True. 

2013 """ 

2014 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

2015 return rndstr + "/" 

2016 

2017 def setUp(self) -> None: 

2018 config = Config(self.configFile) 

2019 uri = ResourcePath(config[".datastore.datastore.root"]) 

2020 self.bucketName = uri.netloc 

2021 

2022 # Enable S3 mocking of tests. 

2023 self.enterContext(clean_test_environment_for_s3()) 

2024 self.mock_s3.start() 

2025 

2026 if self.useTempRoot: 

2027 self.root = self.genRoot() 

2028 rooturi = f"s3://{self.bucketName}/{self.root}" 

2029 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

2030 

2031 # need local folder to store registry database 

2032 self.reg_dir = makeTestTempDir(TESTDIR) 

2033 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

2034 

2035 # MOTO needs to know that we expect Bucket bucketname to exist 

2036 # (this used to be the class attribute bucketName) 

2037 s3 = boto3.resource("s3") 

2038 s3.create_bucket(Bucket=self.bucketName) 

2039 

2040 self.datastoreStr = [f"datastore='{rooturi}'"] 

2041 self.datastoreName = [f"FileDatastore@{rooturi}"] 

2042 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

2043 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

2044 

2045 def tearDown(self) -> None: 

2046 s3 = boto3.resource("s3") 

2047 bucket = s3.Bucket(self.bucketName) 

2048 try: 

2049 bucket.objects.all().delete() 

2050 except botocore.exceptions.ClientError as e: 

2051 if e.response["Error"]["Code"] == "404": 

2052 # the key was not reachable - pass 

2053 pass 

2054 else: 

2055 raise 

2056 

2057 bucket = s3.Bucket(self.bucketName) 

2058 bucket.delete() 

2059 

2060 # Stop the S3 mock. 

2061 self.mock_s3.stop() 

2062 

2063 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

2064 shutil.rmtree(self.reg_dir, ignore_errors=True) 

2065 

2066 if self.useTempRoot and os.path.exists(self.root): 

2067 shutil.rmtree(self.root, ignore_errors=True) 

2068 

2069 super().tearDown() 

2070 

2071 

2072class PosixDatastoreTransfers(unittest.TestCase): 

2073 """Test data transfers between butlers. 

2074 

2075 Test for different managers. UUID to UUID and integer to integer are 

2076 tested. UUID to integer is not supported since we do not currently 

2077 want to allow that. Integer to UUID is supported with the caveat 

2078 that UUID4 will be generated and this will be incorrect for raw 

2079 dataset types. The test ignores that. 

2080 """ 

2081 

2082 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2083 storageClassFactory: StorageClassFactory 

2084 

2085 @classmethod 

2086 def setUpClass(cls) -> None: 

2087 cls.storageClassFactory = StorageClassFactory() 

2088 cls.storageClassFactory.addFromConfig(cls.configFile) 

2089 

2090 def setUp(self) -> None: 

2091 self.root = makeTestTempDir(TESTDIR) 

2092 self.config = Config(self.configFile) 

2093 

2094 def tearDown(self) -> None: 

2095 removeTestTempDir(self.root) 

2096 

2097 def create_butler(self, manager: str, label: str) -> Butler: 

2098 config = Config(self.configFile) 

2099 config["registry", "managers", "datasets"] = manager 

2100 return Butler.from_config( 

2101 Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True 

2102 ) 

2103 

2104 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

2105 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

2106 if manager1 is None: 

2107 manager1 = default 

2108 if manager2 is None: 

2109 manager2 = default 

2110 self.source_butler = self.create_butler(manager1, "1") 

2111 self.target_butler = self.create_butler(manager2, "2") 

2112 

2113 def testTransferUuidToUuid(self) -> None: 

2114 self.create_butlers() 

2115 self.assertButlerTransfers() 

2116 

2117 def _enable_trust(self, datastore: Datastore) -> None: 

2118 datastores = getattr(datastore, "datastores", [datastore]) 

2119 for this_datastore in datastores: 

2120 if hasattr(this_datastore, "trustGetRequest"): 

2121 this_datastore.trustGetRequest = True 

2122 

2123 def testTransferMissing(self) -> None: 

2124 """Test transfers where datastore records are missing. 

2125 

2126 This is how execution butler works. 

2127 """ 

2128 self.create_butlers() 

2129 

2130 # Configure the source butler to allow trust. 

2131 self._enable_trust(self.source_butler._datastore) 

2132 

2133 self.assertButlerTransfers(purge=True) 

2134 

2135 def testTransferMissingDisassembly(self) -> None: 

2136 """Test transfers where datastore records are missing. 

2137 

2138 This is how execution butler works. 

2139 """ 

2140 self.create_butlers() 

2141 

2142 # Configure the source butler to allow trust. 

2143 self._enable_trust(self.source_butler._datastore) 

2144 

2145 # Test disassembly. 

2146 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2147 

2148 def testAbsoluteURITransferDirect(self) -> None: 

2149 """Test transfer using an absolute URI.""" 

2150 self._absolute_transfer("auto") 

2151 

2152 def testAbsoluteURITransferCopy(self) -> None: 

2153 """Test transfer using an absolute URI.""" 

2154 self._absolute_transfer("copy") 

2155 

2156 def _absolute_transfer(self, transfer: str) -> None: 

2157 self.create_butlers() 

2158 

2159 storageClassName = "StructuredData" 

2160 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2161 datasetTypeName = "random_data" 

2162 run = "run1" 

2163 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2164 

2165 dimensions = self.source_butler.dimensions.conform(()) 

2166 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2167 self.source_butler.registry.registerDatasetType(datasetType) 

2168 

2169 metrics = makeExampleMetrics() 

2170 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2171 dataId = DataCoordinate.make_empty(self.source_butler.dimensions) 

2172 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2173 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2174 dataset = FileDataset(path=temp, refs=source_refs) 

2175 self.source_butler.ingest(dataset, transfer="direct") 

2176 

2177 self.target_butler.transfer_from( 

2178 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2179 ) 

2180 

2181 uri = self.target_butler.getURI(dataset.refs[0]) 

2182 if transfer == "auto": 

2183 self.assertEqual(uri, temp) 

2184 else: 

2185 self.assertNotEqual(uri, temp) 

2186 

2187 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2188 """Test that a run can be transferred to another butler.""" 

2189 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2190 datasetTypeName = "random_data" 

2191 

2192 # Test will create 3 collections and we will want to transfer 

2193 # two of those three. 

2194 runs = ["run1", "run2", "other"] 

2195 

2196 # Also want to use two different dataset types to ensure that 

2197 # grouping works. 

2198 datasetTypeNames = ["random_data", "random_data_2"] 

2199 

2200 # Create the run collections in the source butler. 

2201 for run in runs: 

2202 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2203 

2204 # Create dimensions in source butler. 

2205 n_exposures = 30 

2206 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2207 self.source_butler.registry.insertDimensionData( 

2208 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2209 ) 

2210 self.source_butler.registry.insertDimensionData( 

2211 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2212 ) 

2213 

2214 for i in range(n_exposures): 

2215 self.source_butler.registry.insertDimensionData( 

2216 "exposure", 

2217 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2218 ) 

2219 

2220 # Create dataset types in the source butler. 

2221 dimensions = self.source_butler.dimensions.conform(["instrument", "exposure"]) 

2222 for datasetTypeName in datasetTypeNames: 

2223 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2224 self.source_butler.registry.registerDatasetType(datasetType) 

2225 

2226 # Write a dataset to an unrelated run -- this will ensure that 

2227 # we are rewriting integer dataset ids in the target if necessary. 

2228 # Will not be relevant for UUID. 

2229 run = "distraction" 

2230 butler = Butler.from_config(butler=self.source_butler, run=run) 

2231 butler.put( 

2232 makeExampleMetrics(), 

2233 datasetTypeName, 

2234 exposure=1, 

2235 instrument="DummyCamComp", 

2236 physical_filter="d-r", 

2237 ) 

2238 

2239 # Write some example metrics to the source 

2240 butler = Butler.from_config(butler=self.source_butler) 

2241 

2242 # Set of DatasetRefs that should be in the list of refs to transfer 

2243 # but which will not be transferred. 

2244 deleted: set[DatasetRef] = set() 

2245 

2246 n_expected = 20 # Number of datasets expected to be transferred 

2247 source_refs = [] 

2248 for i in range(n_exposures): 

2249 # Put a third of datasets into each collection, only retain 

2250 # two thirds. 

2251 index = i % 3 

2252 run = runs[index] 

2253 datasetTypeName = datasetTypeNames[i % 2] 

2254 

2255 metric = MetricsExample( 

2256 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2257 ) 

2258 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2259 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2260 

2261 # Remove the datastore record using low-level API, but only 

2262 # for a specific index. 

2263 if purge and index == 1: 

2264 # For one of these delete the file as well. 

2265 # This allows the "missing" code to filter the 

2266 # file out. 

2267 # Access the individual datastores. 

2268 datastores = [] 

2269 if hasattr(butler._datastore, "datastores"): 

2270 datastores.extend(butler._datastore.datastores) 

2271 else: 

2272 datastores.append(butler._datastore) 

2273 

2274 if not deleted: 

2275 # For a chained datastore we need to remove 

2276 # files in each chain. 

2277 for datastore in datastores: 

2278 # The file might not be known to the datastore 

2279 # if constraints are used. 

2280 try: 

2281 primary, uris = datastore.getURIs(ref) 

2282 except FileNotFoundError: 

2283 continue 

2284 if primary and primary.scheme != "mem": 

2285 primary.remove() 

2286 for uri in uris.values(): 

2287 if uri.scheme != "mem": 

2288 uri.remove() 

2289 n_expected -= 1 

2290 deleted.add(ref) 

2291 

2292 # Remove the datastore record. 

2293 for datastore in datastores: 

2294 if hasattr(datastore, "removeStoredItemInfo"): 

2295 datastore.removeStoredItemInfo(ref) 

2296 

2297 if index < 2: 

2298 source_refs.append(ref) 

2299 if ref not in deleted: 

2300 new_metric = butler.get(ref) 

2301 self.assertEqual(new_metric, metric) 

2302 

2303 # Create some bad dataset types to ensure we check for inconsistent 

2304 # definitions. 

2305 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2306 for datasetTypeName in datasetTypeNames: 

2307 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2308 self.target_butler.registry.registerDatasetType(datasetType) 

2309 with self.assertRaises(ConflictingDefinitionError) as cm: 

2310 self.target_butler.transfer_from(self.source_butler, source_refs) 

2311 self.assertIn("dataset type differs", str(cm.exception)) 

2312 

2313 # And remove the bad definitions. 

2314 for datasetTypeName in datasetTypeNames: 

2315 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2316 

2317 # Transfer without creating dataset types should fail. 

2318 with self.assertRaises(KeyError): 

2319 self.target_butler.transfer_from(self.source_butler, source_refs) 

2320 

2321 # Transfer without creating dimensions should fail. 

2322 with self.assertRaises(ConflictingDefinitionError) as cm: 

2323 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2324 self.assertIn("dimension", str(cm.exception)) 

2325 

2326 # The failed transfer above leaves registry in an inconsistent 

2327 # state because the run is created but then rolled back without 

2328 # the collection cache being cleared. For now force a refresh. 

2329 # Can remove with DM-35498. 

2330 self.target_butler.registry.refresh() 

2331 

2332 # Do a dry run -- this should not have any effect on the target butler. 

2333 self.target_butler.transfer_from(self.source_butler, source_refs, dry_run=True) 

2334 

2335 # Transfer the records for one ref to test the alternative API. 

2336 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2337 self.target_butler.transfer_dimension_records_from(self.source_butler, [source_refs[0]]) 

2338 self.assertIn("number of records transferred: 1", ";".join(log_cm.output)) 

2339 

2340 # Now transfer them to the second butler, including dimensions. 

2341 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm: 

2342 transferred = self.target_butler.transfer_from( 

2343 self.source_butler, 

2344 source_refs, 

2345 register_dataset_types=True, 

2346 transfer_dimensions=True, 

2347 ) 

2348 self.assertEqual(len(transferred), n_expected) 

2349 log_output = ";".join(log_cm.output) 

2350 

2351 # A ChainedDatastore will use the in-memory datastore for mexists 

2352 # so we can not rely on the mexists log message. 

2353 self.assertIn("Number of datastore records found in source", log_output) 

2354 self.assertIn("Creating output run", log_output) 

2355 

2356 # Do the transfer twice to ensure that it will do nothing extra. 

2357 # Only do this if purge=True because it does not work for int 

2358 # dataset_id. 

2359 if purge: 

2360 # This should not need to register dataset types. 

2361 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2362 self.assertEqual(len(transferred), n_expected) 

2363 

2364 # Also do an explicit low-level transfer to trigger some 

2365 # edge cases. 

2366 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2367 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs) 

2368 log_output = ";".join(log_cm.output) 

2369 self.assertIn("no file artifacts exist", log_output) 

2370 

2371 with self.assertRaises((TypeError, AttributeError)): 

2372 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2373 

2374 with self.assertRaises(ValueError): 

2375 self.target_butler._datastore.transfer_from( 

2376 self.source_butler._datastore, source_refs, transfer="split" 

2377 ) 

2378 

2379 # Now try to get the same refs from the new butler. 

2380 for ref in source_refs: 

2381 if ref not in deleted: 

2382 new_metric = self.target_butler.get(ref) 

2383 old_metric = self.source_butler.get(ref) 

2384 self.assertEqual(new_metric, old_metric) 

2385 

2386 # Now prune run2 collection and create instead a CHAINED collection. 

2387 # This should block the transfer. 

2388 self.target_butler.removeRuns(["run2"], unstore=True) 

2389 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2390 with self.assertRaises(CollectionTypeError): 

2391 # Re-importing the run1 datasets can be problematic if they 

2392 # use integer IDs so filter those out. 

2393 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2394 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2395 

2396 

2397class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2398 """Test transfers using a chained datastore.""" 

2399 

2400 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2401 

2402 

2403class NullDatastoreTestCase(unittest.TestCase): 

2404 """Test that we can fall back to a null datastore.""" 

2405 

2406 # Need a good config to create the repo. 

2407 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2408 storageClassFactory: StorageClassFactory 

2409 

2410 @classmethod 

2411 def setUpClass(cls) -> None: 

2412 cls.storageClassFactory = StorageClassFactory() 

2413 cls.storageClassFactory.addFromConfig(cls.configFile) 

2414 

2415 def setUp(self) -> None: 

2416 """Create a new butler root for each test.""" 

2417 self.root = makeTestTempDir(TESTDIR) 

2418 Butler.makeRepo(self.root, config=Config(self.configFile)) 

2419 

2420 def tearDown(self) -> None: 

2421 removeTestTempDir(self.root) 

2422 

2423 def test_fallback(self) -> None: 

2424 # Read the butler config and mess with the datastore section. 

2425 config_path = os.path.join(self.root, "butler.yaml") 

2426 bad_config = Config(config_path) 

2427 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" 

2428 bad_config.dumpToUri(config_path) 

2429 

2430 with self.assertRaises(RuntimeError): 

2431 Butler(self.root, without_datastore=False) 

2432 

2433 with self.assertRaises(RuntimeError): 

2434 Butler.from_config(self.root, without_datastore=False) 

2435 

2436 butler = Butler.from_config(self.root, writeable=True, without_datastore=True) 

2437 self.assertIsInstance(butler._datastore, NullDatastore) 

2438 

2439 # Check that registry is working. 

2440 butler.registry.registerRun("MYRUN") 

2441 collections = butler.registry.queryCollections(...) 

2442 self.assertIn("MYRUN", set(collections)) 

2443 

2444 # Create a ref. 

2445 dimensions = butler.dimensions.conform([]) 

2446 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2447 datasetTypeName = "metric" 

2448 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2449 butler.registry.registerDatasetType(datasetType) 

2450 ref = DatasetRef(datasetType, {}, run="MYRUN") 

2451 

2452 # Check that datastore will complain. 

2453 with self.assertRaises(FileNotFoundError): 

2454 butler.get(ref) 

2455 with self.assertRaises(FileNotFoundError): 

2456 butler.getURI(ref) 

2457 

2458 

2459def setup_module(module: types.ModuleType) -> None: 

2460 """Set up the module for pytest.""" 

2461 clean_environment() 

2462 

2463 

2464if __name__ == "__main__": 

2465 clean_environment() 

2466 unittest.main()