Coverage for tests/test_butler.py: 16%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1118 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import logging 

26import os 

27import pathlib 

28import pickle 

29import posixpath 

30import random 

31import shutil 

32import socket 

33import string 

34import tempfile 

35import time 

36import unittest 

37 

38try: 

39 import boto3 

40 import botocore 

41 from moto import mock_s3 

42except ImportError: 

43 boto3 = None 

44 

45 def mock_s3(cls): 

46 """A no-op decorator in case moto mock_s3 can not be imported.""" 

47 return cls 

48 

49 

50try: 

51 from cheroot import wsgi 

52 from wsgidav.wsgidav_app import WsgiDAVApp 

53except ImportError: 

54 WsgiDAVApp = None 

55 

56from tempfile import gettempdir 

57from threading import Thread 

58 

59import astropy.time 

60from lsst.daf.butler import ( 

61 Butler, 

62 ButlerConfig, 

63 CollectionSearch, 

64 CollectionType, 

65 Config, 

66 DatasetIdGenEnum, 

67 DatasetRef, 

68 DatasetType, 

69 FileDataset, 

70 FileTemplateValidationError, 

71 StorageClassFactory, 

72 ValidationError, 

73 script, 

74) 

75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

76from lsst.daf.butler.registry import ConflictingDefinitionError, MissingCollectionError 

77from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

78from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

79from lsst.resources import ResourcePath 

80from lsst.resources.http import isWebdavEndpoint 

81from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

82from lsst.utils import doImport 

83from lsst.utils.introspection import get_full_type_name 

84 

85TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

86 

87 

88def makeExampleMetrics(): 

89 return MetricsExample( 

90 {"AM1": 5.2, "AM2": 30.6}, 

91 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

92 [563, 234, 456.7, 752, 8, 9, 27], 

93 ) 

94 

95 

96class TransactionTestError(Exception): 

97 """Specific error for testing transactions, to prevent misdiagnosing 

98 that might otherwise occur when a standard exception is used. 

99 """ 

100 

101 pass 

102 

103 

104class ButlerConfigTests(unittest.TestCase): 

105 """Simple tests for ButlerConfig that are not tested in any other test 

106 cases.""" 

107 

108 def testSearchPath(self): 

109 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

110 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

111 config1 = ButlerConfig(configFile) 

112 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

113 

114 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

115 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

116 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

117 self.assertIn("testConfigs", "\n".join(cm.output)) 

118 

119 key = ("datastore", "records", "table") 

120 self.assertNotEqual(config1[key], config2[key]) 

121 self.assertEqual(config2[key], "override_record") 

122 

123 

124class ButlerPutGetTests: 

125 """Helper method for running a suite of put/get tests from different 

126 butler configurations.""" 

127 

128 root = None 

129 

130 @staticmethod 

131 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

132 """Create a DatasetType and register it""" 

133 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

134 registry.registerDatasetType(datasetType) 

135 return datasetType 

136 

137 @classmethod 

138 def setUpClass(cls): 

139 cls.storageClassFactory = StorageClassFactory() 

140 cls.storageClassFactory.addFromConfig(cls.configFile) 

141 

142 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

143 datasetType = datasetRef.datasetType 

144 dataId = datasetRef.dataId 

145 deferred = butler.getDirectDeferred(datasetRef) 

146 

147 for component in components: 

148 compTypeName = datasetType.componentTypeName(component) 

149 result = butler.get(compTypeName, dataId, collections=collections) 

150 self.assertEqual(result, getattr(reference, component)) 

151 result_deferred = deferred.get(component=component) 

152 self.assertEqual(result_deferred, result) 

153 

154 def tearDown(self): 

155 removeTestTempDir(self.root) 

156 

157 def create_butler(self, run, storageClass, datasetTypeName): 

158 butler = Butler(self.tmpConfigFile, run=run) 

159 

160 collections = set(butler.registry.queryCollections()) 

161 self.assertEqual(collections, set([run])) 

162 

163 # Create and register a DatasetType 

164 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

165 

166 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

167 

168 # Add needed Dimensions 

169 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

170 butler.registry.insertDimensionData( 

171 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

172 ) 

173 butler.registry.insertDimensionData( 

174 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

175 ) 

176 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

177 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

178 butler.registry.insertDimensionData( 

179 "visit", 

180 { 

181 "instrument": "DummyCamComp", 

182 "id": 423, 

183 "name": "fourtwentythree", 

184 "physical_filter": "d-r", 

185 "visit_system": 1, 

186 "datetime_begin": visit_start, 

187 "datetime_end": visit_end, 

188 }, 

189 ) 

190 

191 # Add a second visit for some later tests 

192 butler.registry.insertDimensionData( 

193 "visit", 

194 { 

195 "instrument": "DummyCamComp", 

196 "id": 424, 

197 "name": "fourtwentyfour", 

198 "physical_filter": "d-r", 

199 "visit_system": 1, 

200 }, 

201 ) 

202 return butler, datasetType 

203 

204 def runPutGetTest(self, storageClass, datasetTypeName): 

205 # New datasets will be added to run and tag, but we will only look in 

206 # tag when looking up datasets. 

207 run = "ingest" 

208 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

209 

210 # Create and store a dataset 

211 metric = makeExampleMetrics() 

212 dataId = {"instrument": "DummyCamComp", "visit": 423} 

213 

214 # Create a DatasetRef for put 

215 refIn = DatasetRef(datasetType, dataId, id=None) 

216 

217 # Put with a preexisting id should fail 

218 with self.assertRaises(ValueError): 

219 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

220 

221 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

222 # and once with a DatasetType 

223 

224 # Keep track of any collections we add and do not clean up 

225 expected_collections = {run} 

226 

227 counter = 0 

228 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

229 # Since we are using subTest we can get cascading failures 

230 # here with the first attempt failing and the others failing 

231 # immediately because the dataset already exists. Work around 

232 # this by using a distinct run collection each time 

233 counter += 1 

234 this_run = f"put_run_{counter}" 

235 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

236 expected_collections.update({this_run}) 

237 

238 with self.subTest(args=args): 

239 ref = butler.put(metric, *args, run=this_run) 

240 self.assertIsInstance(ref, DatasetRef) 

241 

242 # Test getDirect 

243 metricOut = butler.getDirect(ref) 

244 self.assertEqual(metric, metricOut) 

245 # Test get 

246 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

247 self.assertEqual(metric, metricOut) 

248 # Test get with a datasetRef 

249 metricOut = butler.get(ref, collections=this_run) 

250 self.assertEqual(metric, metricOut) 

251 # Test getDeferred with dataId 

252 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

253 self.assertEqual(metric, metricOut) 

254 # Test getDeferred with a datasetRef 

255 metricOut = butler.getDeferred(ref, collections=this_run).get() 

256 self.assertEqual(metric, metricOut) 

257 # and deferred direct with ref 

258 metricOut = butler.getDirectDeferred(ref).get() 

259 self.assertEqual(metric, metricOut) 

260 

261 # Check we can get components 

262 if storageClass.isComposite(): 

263 self.assertGetComponents( 

264 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

265 ) 

266 

267 # Can the artifacts themselves be retrieved? 

268 if not butler.datastore.isEphemeral: 

269 root_uri = ResourcePath(self.root) 

270 

271 for preserve_path in (True, False): 

272 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

273 # Use copy so that we can test that overwrite 

274 # protection works (using "auto" for File URIs would 

275 # use hard links and subsequent transfer would work 

276 # because it knows they are the same file). 

277 transferred = butler.retrieveArtifacts( 

278 [ref], destination, preserve_path=preserve_path, transfer="copy" 

279 ) 

280 self.assertGreater(len(transferred), 0) 

281 artifacts = list(ResourcePath.findFileResources([destination])) 

282 self.assertEqual(set(transferred), set(artifacts)) 

283 

284 for artifact in transferred: 

285 path_in_destination = artifact.relative_to(destination) 

286 self.assertIsNotNone(path_in_destination) 

287 

288 # when path is not preserved there should not be 

289 # any path separators. 

290 num_seps = path_in_destination.count("/") 

291 if preserve_path: 

292 self.assertGreater(num_seps, 0) 

293 else: 

294 self.assertEqual(num_seps, 0) 

295 

296 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

297 n_uris = len(secondary_uris) 

298 if primary_uri: 

299 n_uris += 1 

300 self.assertEqual( 

301 len(artifacts), 

302 n_uris, 

303 "Comparing expected artifacts vs actual:" 

304 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

305 ) 

306 

307 if preserve_path: 

308 # No need to run these twice 

309 with self.assertRaises(ValueError): 

310 butler.retrieveArtifacts([ref], destination, transfer="move") 

311 

312 with self.assertRaises(FileExistsError): 

313 butler.retrieveArtifacts([ref], destination) 

314 

315 transferred_again = butler.retrieveArtifacts( 

316 [ref], destination, preserve_path=preserve_path, overwrite=True 

317 ) 

318 self.assertEqual(set(transferred_again), set(transferred)) 

319 

320 # Now remove the dataset completely. 

321 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

322 # Lookup with original args should still fail. 

323 with self.assertRaises(LookupError): 

324 butler.datasetExists(*args, collections=this_run) 

325 # getDirect() should still fail. 

326 with self.assertRaises(FileNotFoundError): 

327 butler.getDirect(ref) 

328 # Registry shouldn't be able to find it by dataset_id anymore. 

329 self.assertIsNone(butler.registry.getDataset(ref.id)) 

330 

331 # Do explicit registry removal since we know they are 

332 # empty 

333 butler.registry.removeCollection(this_run) 

334 expected_collections.remove(this_run) 

335 

336 # Put the dataset again, since the last thing we did was remove it 

337 # and we want to use the default collection. 

338 ref = butler.put(metric, refIn) 

339 

340 # Get with parameters 

341 stop = 4 

342 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

343 self.assertNotEqual(metric, sliced) 

344 self.assertEqual(metric.summary, sliced.summary) 

345 self.assertEqual(metric.output, sliced.output) 

346 self.assertEqual(metric.data[:stop], sliced.data) 

347 # getDeferred with parameters 

348 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

349 self.assertNotEqual(metric, sliced) 

350 self.assertEqual(metric.summary, sliced.summary) 

351 self.assertEqual(metric.output, sliced.output) 

352 self.assertEqual(metric.data[:stop], sliced.data) 

353 # getDeferred with deferred parameters 

354 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

355 self.assertNotEqual(metric, sliced) 

356 self.assertEqual(metric.summary, sliced.summary) 

357 self.assertEqual(metric.output, sliced.output) 

358 self.assertEqual(metric.data[:stop], sliced.data) 

359 

360 if storageClass.isComposite(): 

361 # Check that components can be retrieved 

362 metricOut = butler.get(ref.datasetType.name, dataId) 

363 compNameS = ref.datasetType.componentTypeName("summary") 

364 compNameD = ref.datasetType.componentTypeName("data") 

365 summary = butler.get(compNameS, dataId) 

366 self.assertEqual(summary, metric.summary) 

367 data = butler.get(compNameD, dataId) 

368 self.assertEqual(data, metric.data) 

369 

370 if "counter" in storageClass.derivedComponents: 

371 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

372 self.assertEqual(count, len(data)) 

373 

374 count = butler.get( 

375 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

376 ) 

377 self.assertEqual(count, stop) 

378 

379 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

380 summary = butler.getDirect(compRef) 

381 self.assertEqual(summary, metric.summary) 

382 

383 # Create a Dataset type that has the same name but is inconsistent. 

384 inconsistentDatasetType = DatasetType( 

385 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

386 ) 

387 

388 # Getting with a dataset type that does not match registry fails 

389 with self.assertRaises(ValueError): 

390 butler.get(inconsistentDatasetType, dataId) 

391 

392 # Combining a DatasetRef with a dataId should fail 

393 with self.assertRaises(ValueError): 

394 butler.get(ref, dataId) 

395 # Getting with an explicit ref should fail if the id doesn't match 

396 with self.assertRaises(ValueError): 

397 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

398 

399 # Getting a dataset with unknown parameters should fail 

400 with self.assertRaises(KeyError): 

401 butler.get(ref, parameters={"unsupported": True}) 

402 

403 # Check we have a collection 

404 collections = set(butler.registry.queryCollections()) 

405 self.assertEqual(collections, expected_collections) 

406 

407 # Clean up to check that we can remove something that may have 

408 # already had a component removed 

409 butler.pruneDatasets([ref], unstore=True, purge=True) 

410 

411 # Check that we can configure a butler to accept a put even 

412 # if it already has the dataset in registry. 

413 ref = butler.put(metric, refIn) 

414 

415 # Repeat put will fail. 

416 with self.assertRaises(ConflictingDefinitionError): 

417 butler.put(metric, refIn) 

418 

419 # Remove the datastore entry. 

420 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

421 

422 # Put will still fail 

423 with self.assertRaises(ConflictingDefinitionError): 

424 butler.put(metric, refIn) 

425 

426 # Allow the put to succeed 

427 butler._allow_put_of_predefined_dataset = True 

428 ref2 = butler.put(metric, refIn) 

429 self.assertEqual(ref2.id, ref.id) 

430 

431 # A second put will still fail but with a different exception 

432 # than before. 

433 with self.assertRaises(ConflictingDefinitionError): 

434 butler.put(metric, refIn) 

435 

436 # Reset the flag to avoid confusion 

437 butler._allow_put_of_predefined_dataset = False 

438 

439 # Leave the dataset in place since some downstream tests require 

440 # something to be present 

441 

442 return butler 

443 

444 def testDeferredCollectionPassing(self): 

445 # Construct a butler with no run or collection, but make it writeable. 

446 butler = Butler(self.tmpConfigFile, writeable=True) 

447 # Create and register a DatasetType 

448 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

449 datasetType = self.addDatasetType( 

450 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

451 ) 

452 # Add needed Dimensions 

453 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

454 butler.registry.insertDimensionData( 

455 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

456 ) 

457 butler.registry.insertDimensionData( 

458 "visit", 

459 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

460 ) 

461 dataId = {"instrument": "DummyCamComp", "visit": 423} 

462 # Create dataset. 

463 metric = makeExampleMetrics() 

464 # Register a new run and put dataset. 

465 run = "deferred" 

466 self.assertTrue(butler.registry.registerRun(run)) 

467 # Second time it will be allowed but indicate no-op 

468 self.assertFalse(butler.registry.registerRun(run)) 

469 ref = butler.put(metric, datasetType, dataId, run=run) 

470 # Putting with no run should fail with TypeError. 

471 with self.assertRaises(TypeError): 

472 butler.put(metric, datasetType, dataId) 

473 # Dataset should exist. 

474 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

475 # We should be able to get the dataset back, but with and without 

476 # a deferred dataset handle. 

477 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

478 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

479 # Trying to find the dataset without any collection is a TypeError. 

480 with self.assertRaises(TypeError): 

481 butler.datasetExists(datasetType, dataId) 

482 with self.assertRaises(TypeError): 

483 butler.get(datasetType, dataId) 

484 # Associate the dataset with a different collection. 

485 butler.registry.registerCollection("tagged") 

486 butler.registry.associate("tagged", [ref]) 

487 # Deleting the dataset from the new collection should make it findable 

488 # in the original collection. 

489 butler.pruneDatasets([ref], tags=["tagged"]) 

490 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

491 

492 

493class ButlerTests(ButlerPutGetTests): 

494 """Tests for Butler.""" 

495 

496 useTempRoot = True 

497 

498 def setUp(self): 

499 """Create a new butler root for each test.""" 

500 self.root = makeTestTempDir(TESTDIR) 

501 Butler.makeRepo(self.root, config=Config(self.configFile)) 

502 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

503 

504 def testConstructor(self): 

505 """Independent test of constructor.""" 

506 butler = Butler(self.tmpConfigFile, run="ingest") 

507 self.assertIsInstance(butler, Butler) 

508 

509 # Check that butler.yaml is added automatically. 

510 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

511 config_dir = self.tmpConfigFile[: -len(end)] 

512 butler = Butler(config_dir, run="ingest") 

513 self.assertIsInstance(butler, Butler) 

514 

515 collections = set(butler.registry.queryCollections()) 

516 self.assertEqual(collections, {"ingest"}) 

517 

518 # Check that some special characters can be included in run name. 

519 special_run = "u@b.c-A" 

520 butler_special = Butler(butler=butler, run=special_run) 

521 collections = set(butler_special.registry.queryCollections("*@*")) 

522 self.assertEqual(collections, {special_run}) 

523 

524 butler2 = Butler(butler=butler, collections=["other"]) 

525 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

526 self.assertIsNone(butler2.run) 

527 self.assertIs(butler.datastore, butler2.datastore) 

528 

529 # Test that we can use an environment variable to find this 

530 # repository. 

531 butler_index = Config() 

532 butler_index["label"] = self.tmpConfigFile 

533 for suffix in (".yaml", ".json"): 

534 # Ensure that the content differs so that we know that 

535 # we aren't reusing the cache. 

536 bad_label = f"s3://bucket/not_real{suffix}" 

537 butler_index["bad_label"] = bad_label 

538 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

539 butler_index.dumpToUri(temp_file) 

540 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

541 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

542 uri = Butler.get_repo_uri("bad_label") 

543 self.assertEqual(uri, ResourcePath(bad_label)) 

544 uri = Butler.get_repo_uri("label") 

545 butler = Butler(uri, writeable=False) 

546 self.assertIsInstance(butler, Butler) 

547 with self.assertRaises(KeyError) as cm: 

548 Butler.get_repo_uri("missing") 

549 self.assertIn("not known to", str(cm.exception)) 

550 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

551 with self.assertRaises(FileNotFoundError): 

552 Butler.get_repo_uri("label") 

553 self.assertEqual(Butler.get_known_repos(), set()) 

554 with self.assertRaises(KeyError) as cm: 

555 # No environment variable set. 

556 Butler.get_repo_uri("label") 

557 self.assertIn("No repository index defined", str(cm.exception)) 

558 self.assertEqual(Butler.get_known_repos(), set()) 

559 

560 def testBasicPutGet(self): 

561 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

562 self.runPutGetTest(storageClass, "test_metric") 

563 

564 def testCompositePutGetConcrete(self): 

565 

566 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

567 butler = self.runPutGetTest(storageClass, "test_metric") 

568 

569 # Should *not* be disassembled 

570 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

571 self.assertEqual(len(datasets), 1) 

572 uri, components = butler.getURIs(datasets[0]) 

573 self.assertIsInstance(uri, ResourcePath) 

574 self.assertFalse(components) 

575 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

576 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

577 

578 # Predicted dataset 

579 dataId = {"instrument": "DummyCamComp", "visit": 424} 

580 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

581 self.assertFalse(components) 

582 self.assertIsInstance(uri, ResourcePath) 

583 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

584 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

585 

586 def testCompositePutGetVirtual(self): 

587 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

588 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

589 

590 # Should be disassembled 

591 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

592 self.assertEqual(len(datasets), 1) 

593 uri, components = butler.getURIs(datasets[0]) 

594 

595 if butler.datastore.isEphemeral: 

596 # Never disassemble in-memory datastore 

597 self.assertIsInstance(uri, ResourcePath) 

598 self.assertFalse(components) 

599 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

600 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

601 else: 

602 self.assertIsNone(uri) 

603 self.assertEqual(set(components), set(storageClass.components)) 

604 for compuri in components.values(): 

605 self.assertIsInstance(compuri, ResourcePath) 

606 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

607 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

608 

609 # Predicted dataset 

610 dataId = {"instrument": "DummyCamComp", "visit": 424} 

611 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

612 

613 if butler.datastore.isEphemeral: 

614 # Never disassembled 

615 self.assertIsInstance(uri, ResourcePath) 

616 self.assertFalse(components) 

617 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

618 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

619 else: 

620 self.assertIsNone(uri) 

621 self.assertEqual(set(components), set(storageClass.components)) 

622 for compuri in components.values(): 

623 self.assertIsInstance(compuri, ResourcePath) 

624 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

625 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

626 

627 def testIngest(self): 

628 butler = Butler(self.tmpConfigFile, run="ingest") 

629 

630 # Create and register a DatasetType 

631 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

632 

633 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

634 datasetTypeName = "metric" 

635 

636 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

637 

638 # Add needed Dimensions 

639 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

640 butler.registry.insertDimensionData( 

641 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

642 ) 

643 for detector in (1, 2): 

644 butler.registry.insertDimensionData( 

645 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

646 ) 

647 

648 butler.registry.insertDimensionData( 

649 "visit", 

650 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

651 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

652 ) 

653 

654 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

655 dataRoot = os.path.join(TESTDIR, "data", "basic") 

656 datasets = [] 

657 for detector in (1, 2): 

658 detector_name = f"detector_{detector}" 

659 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

660 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

661 # Create a DatasetRef for ingest 

662 refIn = DatasetRef(datasetType, dataId, id=None) 

663 

664 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

665 

666 butler.ingest(*datasets, transfer="copy") 

667 

668 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

669 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

670 

671 metrics1 = butler.get(datasetTypeName, dataId1) 

672 metrics2 = butler.get(datasetTypeName, dataId2) 

673 self.assertNotEqual(metrics1, metrics2) 

674 

675 # Compare URIs 

676 uri1 = butler.getURI(datasetTypeName, dataId1) 

677 uri2 = butler.getURI(datasetTypeName, dataId2) 

678 self.assertNotEqual(uri1, uri2) 

679 

680 # Now do a multi-dataset but single file ingest 

681 metricFile = os.path.join(dataRoot, "detectors.yaml") 

682 refs = [] 

683 for detector in (1, 2): 

684 detector_name = f"detector_{detector}" 

685 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

686 # Create a DatasetRef for ingest 

687 refs.append(DatasetRef(datasetType, dataId, id=None)) 

688 

689 datasets = [] 

690 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

691 

692 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

693 

694 # Check that the datastore recorded no file size. 

695 # Not all datastores can support this. 

696 try: 

697 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

698 self.assertEqual(infos[0].file_size, -1) 

699 except AttributeError: 

700 pass 

701 

702 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

703 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

704 

705 multi1 = butler.get(datasetTypeName, dataId1) 

706 multi2 = butler.get(datasetTypeName, dataId2) 

707 

708 self.assertEqual(multi1, metrics1) 

709 self.assertEqual(multi2, metrics2) 

710 

711 # Compare URIs 

712 uri1 = butler.getURI(datasetTypeName, dataId1) 

713 uri2 = butler.getURI(datasetTypeName, dataId2) 

714 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

715 

716 # Test that removing one does not break the second 

717 # This line will issue a warning log message for a ChainedDatastore 

718 # that uses an InMemoryDatastore since in-memory can not ingest 

719 # files. 

720 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

721 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

722 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

723 multi2b = butler.get(datasetTypeName, dataId2) 

724 self.assertEqual(multi2, multi2b) 

725 

726 def testPruneCollections(self): 

727 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

728 butler = Butler(self.tmpConfigFile, writeable=True) 

729 # Load registry data with dimensions to hang datasets off of. 

730 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

731 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

732 # Add some RUN-type collections. 

733 run1 = "run1" 

734 butler.registry.registerRun(run1) 

735 run2 = "run2" 

736 butler.registry.registerRun(run2) 

737 # put some datasets. ref1 and ref2 have the same data ID, and are in 

738 # different runs. ref3 has a different data ID. 

739 metric = makeExampleMetrics() 

740 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

741 datasetType = self.addDatasetType( 

742 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

743 ) 

744 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

745 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

746 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

747 

748 # Try to delete a RUN collection without purge, or with purge and not 

749 # unstore. 

750 with self.assertRaises(TypeError): 

751 butler.pruneCollection(run1) 

752 with self.assertRaises(TypeError): 

753 butler.pruneCollection(run2, purge=True) 

754 # Add a TAGGED collection and associate ref3 only into it. 

755 tag1 = "tag1" 

756 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

757 self.assertTrue(registered) 

758 # Registering a second time should be allowed. 

759 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

760 self.assertFalse(registered) 

761 butler.registry.associate(tag1, [ref3]) 

762 # Add a CHAINED collection that searches run1 and then run2. It 

763 # logically contains only ref1, because ref2 is shadowed due to them 

764 # having the same data ID and dataset type. 

765 chain1 = "chain1" 

766 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

767 butler.registry.setCollectionChain(chain1, [run1, run2]) 

768 # Try to delete RUN collections, which should fail with complete 

769 # rollback because they're still referenced by the CHAINED 

770 # collection. 

771 with self.assertRaises(Exception): 

772 butler.pruneCollection(run1, pruge=True, unstore=True) 

773 with self.assertRaises(Exception): 

774 butler.pruneCollection(run2, pruge=True, unstore=True) 

775 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

776 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

777 self.assertTrue(existence[ref1]) 

778 self.assertTrue(existence[ref2]) 

779 self.assertTrue(existence[ref3]) 

780 # Try to delete CHAINED and TAGGED collections with purge; should not 

781 # work. 

782 with self.assertRaises(TypeError): 

783 butler.pruneCollection(tag1, purge=True, unstore=True) 

784 with self.assertRaises(TypeError): 

785 butler.pruneCollection(chain1, purge=True, unstore=True) 

786 # Remove the tagged collection with unstore=False. This should not 

787 # affect the datasets. 

788 butler.pruneCollection(tag1) 

789 with self.assertRaises(MissingCollectionError): 

790 butler.registry.getCollectionType(tag1) 

791 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

792 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

793 self.assertTrue(existence[ref1]) 

794 self.assertTrue(existence[ref2]) 

795 self.assertTrue(existence[ref3]) 

796 # Add the tagged collection back in, and remove it with unstore=True. 

797 # This should remove ref3 only from the datastore. 

798 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

799 butler.registry.associate(tag1, [ref3]) 

800 butler.pruneCollection(tag1, unstore=True) 

801 with self.assertRaises(MissingCollectionError): 

802 butler.registry.getCollectionType(tag1) 

803 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

804 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

805 self.assertTrue(existence[ref1]) 

806 self.assertTrue(existence[ref2]) 

807 self.assertFalse(existence[ref3]) 

808 # Delete the chain with unstore=False. The datasets should not be 

809 # affected at all. 

810 butler.pruneCollection(chain1) 

811 with self.assertRaises(MissingCollectionError): 

812 butler.registry.getCollectionType(chain1) 

813 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

814 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

815 self.assertTrue(existence[ref1]) 

816 self.assertTrue(existence[ref2]) 

817 self.assertFalse(existence[ref3]) 

818 # Redefine and then delete the chain with unstore=True. Only ref1 

819 # should be unstored (ref3 has already been unstored, but otherwise 

820 # would be now). 

821 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

822 butler.registry.setCollectionChain(chain1, [run1, run2]) 

823 butler.pruneCollection(chain1, unstore=True) 

824 with self.assertRaises(MissingCollectionError): 

825 butler.registry.getCollectionType(chain1) 

826 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

827 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

828 self.assertFalse(existence[ref1]) 

829 self.assertTrue(existence[ref2]) 

830 self.assertFalse(existence[ref3]) 

831 # Remove run1. This removes ref1 and ref3 from the registry (they're 

832 # already gone from the datastore, which is fine). 

833 butler.pruneCollection(run1, purge=True, unstore=True) 

834 with self.assertRaises(MissingCollectionError): 

835 butler.registry.getCollectionType(run1) 

836 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

837 self.assertTrue(butler.datastore.exists(ref2)) 

838 # Remove run2. This removes ref2 from the registry and the datastore. 

839 butler.pruneCollection(run2, purge=True, unstore=True) 

840 with self.assertRaises(MissingCollectionError): 

841 butler.registry.getCollectionType(run2) 

842 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

843 

844 # Now that the collections have been pruned we can remove the 

845 # dataset type 

846 butler.registry.removeDatasetType(datasetType.name) 

847 

848 def testPickle(self): 

849 """Test pickle support.""" 

850 butler = Butler(self.tmpConfigFile, run="ingest") 

851 butlerOut = pickle.loads(pickle.dumps(butler)) 

852 self.assertIsInstance(butlerOut, Butler) 

853 self.assertEqual(butlerOut._config, butler._config) 

854 self.assertEqual(butlerOut.collections, butler.collections) 

855 self.assertEqual(butlerOut.run, butler.run) 

856 

857 def testGetDatasetTypes(self): 

858 butler = Butler(self.tmpConfigFile, run="ingest") 

859 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

860 dimensionEntries = [ 

861 ( 

862 "instrument", 

863 {"instrument": "DummyCam"}, 

864 {"instrument": "DummyHSC"}, 

865 {"instrument": "DummyCamComp"}, 

866 ), 

867 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

868 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

869 ] 

870 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

871 # Add needed Dimensions 

872 for args in dimensionEntries: 

873 butler.registry.insertDimensionData(*args) 

874 

875 # When a DatasetType is added to the registry entries are not created 

876 # for components but querying them can return the components. 

877 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

878 components = set() 

879 for datasetTypeName in datasetTypeNames: 

880 # Create and register a DatasetType 

881 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

882 

883 for componentName in storageClass.components: 

884 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

885 

886 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

887 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

888 

889 # Now that we have some dataset types registered, validate them 

890 butler.validateConfiguration( 

891 ignore=[ 

892 "test_metric_comp", 

893 "metric3", 

894 "calexp", 

895 "DummySC", 

896 "datasetType.component", 

897 "random_data", 

898 "random_data_2", 

899 ] 

900 ) 

901 

902 # Add a new datasetType that will fail template validation 

903 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

904 if self.validationCanFail: 

905 with self.assertRaises(ValidationError): 

906 butler.validateConfiguration() 

907 

908 # Rerun validation but with a subset of dataset type names 

909 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

910 

911 # Rerun validation but ignore the bad datasetType 

912 butler.validateConfiguration( 

913 ignore=[ 

914 "test_metric_comp", 

915 "metric3", 

916 "calexp", 

917 "DummySC", 

918 "datasetType.component", 

919 "random_data", 

920 "random_data_2", 

921 ] 

922 ) 

923 

924 def testTransaction(self): 

925 butler = Butler(self.tmpConfigFile, run="ingest") 

926 datasetTypeName = "test_metric" 

927 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

928 dimensionEntries = ( 

929 ("instrument", {"instrument": "DummyCam"}), 

930 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

931 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

932 ) 

933 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

934 metric = makeExampleMetrics() 

935 dataId = {"instrument": "DummyCam", "visit": 42} 

936 # Create and register a DatasetType 

937 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

938 with self.assertRaises(TransactionTestError): 

939 with butler.transaction(): 

940 # Add needed Dimensions 

941 for args in dimensionEntries: 

942 butler.registry.insertDimensionData(*args) 

943 # Store a dataset 

944 ref = butler.put(metric, datasetTypeName, dataId) 

945 self.assertIsInstance(ref, DatasetRef) 

946 # Test getDirect 

947 metricOut = butler.getDirect(ref) 

948 self.assertEqual(metric, metricOut) 

949 # Test get 

950 metricOut = butler.get(datasetTypeName, dataId) 

951 self.assertEqual(metric, metricOut) 

952 # Check we can get components 

953 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

954 raise TransactionTestError("This should roll back the entire transaction") 

955 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

956 butler.registry.expandDataId(dataId) 

957 # Should raise LookupError for missing data ID value 

958 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

959 butler.get(datasetTypeName, dataId) 

960 # Also check explicitly if Dataset entry is missing 

961 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

962 # Direct retrieval should not find the file in the Datastore 

963 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

964 butler.getDirect(ref) 

965 

966 def testMakeRepo(self): 

967 """Test that we can write butler configuration to a new repository via 

968 the Butler.makeRepo interface and then instantiate a butler from the 

969 repo root. 

970 """ 

971 # Do not run the test if we know this datastore configuration does 

972 # not support a file system root 

973 if self.fullConfigKey is None: 

974 return 

975 

976 # create two separate directories 

977 root1 = tempfile.mkdtemp(dir=self.root) 

978 root2 = tempfile.mkdtemp(dir=self.root) 

979 

980 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

981 limited = Config(self.configFile) 

982 butler1 = Butler(butlerConfig) 

983 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

984 full = Config(self.tmpConfigFile) 

985 butler2 = Butler(butlerConfig) 

986 # Butlers should have the same configuration regardless of whether 

987 # defaults were expanded. 

988 self.assertEqual(butler1._config, butler2._config) 

989 # Config files loaded directly should not be the same. 

990 self.assertNotEqual(limited, full) 

991 # Make sure "limited" doesn't have a few keys we know it should be 

992 # inheriting from defaults. 

993 self.assertIn(self.fullConfigKey, full) 

994 self.assertNotIn(self.fullConfigKey, limited) 

995 

996 # Collections don't appear until something is put in them 

997 collections1 = set(butler1.registry.queryCollections()) 

998 self.assertEqual(collections1, set()) 

999 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1000 

1001 # Check that a config with no associated file name will not 

1002 # work properly with relocatable Butler repo 

1003 butlerConfig.configFile = None 

1004 with self.assertRaises(ValueError): 

1005 Butler(butlerConfig) 

1006 

1007 with self.assertRaises(FileExistsError): 

1008 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1009 

1010 def testStringification(self): 

1011 butler = Butler(self.tmpConfigFile, run="ingest") 

1012 butlerStr = str(butler) 

1013 

1014 if self.datastoreStr is not None: 

1015 for testStr in self.datastoreStr: 

1016 self.assertIn(testStr, butlerStr) 

1017 if self.registryStr is not None: 

1018 self.assertIn(self.registryStr, butlerStr) 

1019 

1020 datastoreName = butler.datastore.name 

1021 if self.datastoreName is not None: 

1022 for testStr in self.datastoreName: 

1023 self.assertIn(testStr, datastoreName) 

1024 

1025 def testButlerRewriteDataId(self): 

1026 """Test that dataIds can be rewritten based on dimension records.""" 

1027 

1028 butler = Butler(self.tmpConfigFile, run="ingest") 

1029 

1030 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1031 datasetTypeName = "random_data" 

1032 

1033 # Create dimension records. 

1034 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1035 butler.registry.insertDimensionData( 

1036 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1037 ) 

1038 butler.registry.insertDimensionData( 

1039 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1040 ) 

1041 

1042 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1043 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1044 butler.registry.registerDatasetType(datasetType) 

1045 

1046 n_exposures = 5 

1047 dayobs = 20210530 

1048 

1049 for i in range(n_exposures): 

1050 butler.registry.insertDimensionData( 

1051 "exposure", 

1052 { 

1053 "instrument": "DummyCamComp", 

1054 "id": i, 

1055 "obs_id": f"exp{i}", 

1056 "seq_num": i, 

1057 "day_obs": dayobs, 

1058 "physical_filter": "d-r", 

1059 }, 

1060 ) 

1061 

1062 # Write some data. 

1063 for i in range(n_exposures): 

1064 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1065 

1066 # Use the seq_num for the put to test rewriting. 

1067 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1068 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1069 

1070 # Check that the exposure is correct in the dataId 

1071 self.assertEqual(ref.dataId["exposure"], i) 

1072 

1073 # and check that we can get the dataset back with the same dataId 

1074 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1075 self.assertEqual(new_metric, metric) 

1076 

1077 

1078class FileDatastoreButlerTests(ButlerTests): 

1079 """Common tests and specialization of ButlerTests for butlers backed 

1080 by datastores that inherit from FileDatastore. 

1081 """ 

1082 

1083 def checkFileExists(self, root, relpath): 

1084 """Checks if file exists at a given path (relative to root). 

1085 

1086 Test testPutTemplates verifies actual physical existance of the files 

1087 in the requested location. 

1088 """ 

1089 uri = ResourcePath(root, forceDirectory=True) 

1090 return uri.join(relpath).exists() 

1091 

1092 def testPutTemplates(self): 

1093 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1094 butler = Butler(self.tmpConfigFile, run="ingest") 

1095 

1096 # Add needed Dimensions 

1097 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1098 butler.registry.insertDimensionData( 

1099 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1100 ) 

1101 butler.registry.insertDimensionData( 

1102 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1103 ) 

1104 butler.registry.insertDimensionData( 

1105 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1106 ) 

1107 

1108 # Create and store a dataset 

1109 metric = makeExampleMetrics() 

1110 

1111 # Create two almost-identical DatasetTypes (both will use default 

1112 # template) 

1113 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1114 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1115 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1116 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1117 

1118 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1119 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1120 

1121 # Put with exactly the data ID keys needed 

1122 ref = butler.put(metric, "metric1", dataId1) 

1123 uri = butler.getURI(ref) 

1124 self.assertTrue( 

1125 self.checkFileExists(butler.datastore.root, "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

1126 f"Checking existence of {uri}", 

1127 ) 

1128 

1129 # Check the template based on dimensions 

1130 butler.datastore.templates.validateTemplates([ref]) 

1131 

1132 # Put with extra data ID keys (physical_filter is an optional 

1133 # dependency); should not change template (at least the way we're 

1134 # defining them to behave now; the important thing is that they 

1135 # must be consistent). 

1136 ref = butler.put(metric, "metric2", dataId2) 

1137 uri = butler.getURI(ref) 

1138 self.assertTrue( 

1139 self.checkFileExists(butler.datastore.root, "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

1140 f"Checking existence of {uri}", 

1141 ) 

1142 

1143 # Check the template based on dimensions 

1144 butler.datastore.templates.validateTemplates([ref]) 

1145 

1146 # Now use a file template that will not result in unique filenames 

1147 with self.assertRaises(FileTemplateValidationError): 

1148 butler.put(metric, "metric3", dataId1) 

1149 

1150 def testImportExport(self): 

1151 # Run put/get tests just to create and populate a repo. 

1152 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1153 self.runImportExportTest(storageClass) 

1154 

1155 @unittest.expectedFailure 

1156 def testImportExportVirtualComposite(self): 

1157 # Run put/get tests just to create and populate a repo. 

1158 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1159 self.runImportExportTest(storageClass) 

1160 

1161 def runImportExportTest(self, storageClass): 

1162 """This test does an export to a temp directory and an import back 

1163 into a new temp directory repo. It does not assume a posix datastore""" 

1164 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1165 print("Root:", exportButler.datastore.root) 

1166 # Test that the repo actually has at least one dataset. 

1167 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1168 self.assertGreater(len(datasets), 0) 

1169 # Add a DimensionRecord that's unused by those datasets. 

1170 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1171 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1172 # Export and then import datasets. 

1173 with safeTestTempDir(TESTDIR) as exportDir: 

1174 exportFile = os.path.join(exportDir, "exports.yaml") 

1175 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1176 export.saveDatasets(datasets) 

1177 # Export the same datasets again. This should quietly do 

1178 # nothing because of internal deduplication, and it shouldn't 

1179 # complain about being asked to export the "htm7" elements even 

1180 # though there aren't any in these datasets or in the database. 

1181 export.saveDatasets(datasets, elements=["htm7"]) 

1182 # Save one of the data IDs again; this should be harmless 

1183 # because of internal deduplication. 

1184 export.saveDataIds([datasets[0].dataId]) 

1185 # Save some dimension records directly. 

1186 export.saveDimensionData("skymap", [skymapRecord]) 

1187 self.assertTrue(os.path.exists(exportFile)) 

1188 with safeTestTempDir(TESTDIR) as importDir: 

1189 # We always want this to be a local posix butler 

1190 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1191 # Calling script.butlerImport tests the implementation of the 

1192 # butler command line interface "import" subcommand. Functions 

1193 # in the script folder are generally considered protected and 

1194 # should not be used as public api. 

1195 with open(exportFile, "r") as f: 

1196 script.butlerImport( 

1197 importDir, 

1198 export_file=f, 

1199 directory=exportDir, 

1200 transfer="auto", 

1201 skip_dimensions=None, 

1202 reuse_ids=False, 

1203 ) 

1204 importButler = Butler(importDir, run="ingest") 

1205 for ref in datasets: 

1206 with self.subTest(ref=ref): 

1207 # Test for existence by passing in the DatasetType and 

1208 # data ID separately, to avoid lookup by dataset_id. 

1209 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1210 self.assertEqual( 

1211 list(importButler.registry.queryDimensionRecords("skymap")), 

1212 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1213 ) 

1214 

1215 def testRemoveRuns(self): 

1216 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1217 butler = Butler(self.tmpConfigFile, writeable=True) 

1218 # Load registry data with dimensions to hang datasets off of. 

1219 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1220 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1221 # Add some RUN-type collection. 

1222 run1 = "run1" 

1223 butler.registry.registerRun(run1) 

1224 run2 = "run2" 

1225 butler.registry.registerRun(run2) 

1226 # put a dataset in each 

1227 metric = makeExampleMetrics() 

1228 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1229 datasetType = self.addDatasetType( 

1230 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1231 ) 

1232 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1233 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1234 uri1 = butler.getURI(ref1, collections=[run1]) 

1235 uri2 = butler.getURI(ref2, collections=[run2]) 

1236 # Remove from both runs with different values for unstore. 

1237 butler.removeRuns([run1], unstore=True) 

1238 butler.removeRuns([run2], unstore=False) 

1239 # Should be nothing in registry for either one, and datastore should 

1240 # not think either exists. 

1241 with self.assertRaises(MissingCollectionError): 

1242 butler.registry.getCollectionType(run1) 

1243 with self.assertRaises(MissingCollectionError): 

1244 butler.registry.getCollectionType(run2) 

1245 self.assertFalse(butler.datastore.exists(ref1)) 

1246 self.assertFalse(butler.datastore.exists(ref2)) 

1247 # The ref we unstored should be gone according to the URI, but the 

1248 # one we forgot should still be around. 

1249 self.assertFalse(uri1.exists()) 

1250 self.assertTrue(uri2.exists()) 

1251 

1252 

1253class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1254 """PosixDatastore specialization of a butler""" 

1255 

1256 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1257 fullConfigKey = ".datastore.formatters" 

1258 validationCanFail = True 

1259 datastoreStr = ["/tmp"] 

1260 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1261 registryStr = "/gen3.sqlite3" 

1262 

1263 def testPathConstructor(self): 

1264 """Independent test of constructor using PathLike.""" 

1265 butler = Butler(self.tmpConfigFile, run="ingest") 

1266 self.assertIsInstance(butler, Butler) 

1267 

1268 # And again with a Path object with the butler yaml 

1269 path = pathlib.Path(self.tmpConfigFile) 

1270 butler = Butler(path, writeable=False) 

1271 self.assertIsInstance(butler, Butler) 

1272 

1273 # And again with a Path object without the butler yaml 

1274 # (making sure we skip it if the tmp config doesn't end 

1275 # in butler.yaml -- which is the case for a subclass) 

1276 if self.tmpConfigFile.endswith("butler.yaml"): 

1277 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1278 butler = Butler(path, writeable=False) 

1279 self.assertIsInstance(butler, Butler) 

1280 

1281 def testExportTransferCopy(self): 

1282 """Test local export using all transfer modes""" 

1283 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1284 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1285 # Test that the repo actually has at least one dataset. 

1286 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1287 self.assertGreater(len(datasets), 0) 

1288 uris = [exportButler.getURI(d) for d in datasets] 

1289 datastoreRoot = exportButler.datastore.root 

1290 

1291 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1292 

1293 for path in pathsInStore: 

1294 # Assume local file system 

1295 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1296 

1297 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1298 with safeTestTempDir(TESTDIR) as exportDir: 

1299 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1300 export.saveDatasets(datasets) 

1301 for path in pathsInStore: 

1302 self.assertTrue( 

1303 self.checkFileExists(exportDir, path), 

1304 f"Check that mode {transfer} exported files", 

1305 ) 

1306 

1307 def testPruneDatasets(self): 

1308 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1309 butler = Butler(self.tmpConfigFile, writeable=True) 

1310 # Load registry data with dimensions to hang datasets off of. 

1311 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1312 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1313 # Add some RUN-type collections. 

1314 run1 = "run1" 

1315 butler.registry.registerRun(run1) 

1316 run2 = "run2" 

1317 butler.registry.registerRun(run2) 

1318 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1319 # different runs. ref3 has a different data ID. 

1320 metric = makeExampleMetrics() 

1321 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1322 datasetType = self.addDatasetType( 

1323 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1324 ) 

1325 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1326 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1327 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1328 

1329 # Simple prune. 

1330 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1331 with self.assertRaises(LookupError): 

1332 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1333 

1334 # Put data back. 

1335 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1336 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1337 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1338 

1339 # Check that in normal mode, deleting the record will lead to 

1340 # trash not touching the file. 

1341 uri1 = butler.datastore.getURI(ref1) 

1342 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1343 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1344 butler.datastore.trash(ref1) 

1345 butler.datastore.emptyTrash() 

1346 self.assertTrue(uri1.exists()) 

1347 uri1.remove() # Clean it up. 

1348 

1349 # Simulate execution butler setup by deleting the datastore 

1350 # record but keeping the file around and trusting. 

1351 butler.datastore.trustGetRequest = True 

1352 uri2 = butler.datastore.getURI(ref2) 

1353 uri3 = butler.datastore.getURI(ref3) 

1354 self.assertTrue(uri2.exists()) 

1355 self.assertTrue(uri3.exists()) 

1356 

1357 # Remove the datastore record. 

1358 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1359 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1360 self.assertTrue(uri2.exists()) 

1361 butler.datastore.trash([ref2, ref3]) 

1362 # Immediate removal for ref2 file 

1363 self.assertFalse(uri2.exists()) 

1364 # But ref3 has to wait for the empty. 

1365 self.assertTrue(uri3.exists()) 

1366 butler.datastore.emptyTrash() 

1367 self.assertFalse(uri3.exists()) 

1368 

1369 # Clear out the datasets from registry. 

1370 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1371 

1372 def testPytypePutCoercion(self): 

1373 """Test python type coercion on Butler.get and put.""" 

1374 

1375 # Store some data with the normal example storage class. 

1376 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1377 datasetTypeName = "test_metric" 

1378 butler, _ = self.create_butler("ingest", storageClass, datasetTypeName) 

1379 

1380 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1381 

1382 # Put a dict and this should coerce to a MetricsExample 

1383 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1384 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1385 test_metric = butler.getDirect(metric_ref) 

1386 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1387 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1388 self.assertEqual(test_metric.output, test_dict["output"]) 

1389 

1390 def testPytypeCoercion(self): 

1391 """Test python type coercion on Butler.get and put.""" 

1392 

1393 # Store some data with the normal example storage class. 

1394 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1395 datasetTypeName = "test_metric" 

1396 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1397 

1398 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1399 metric = butler.get(datasetTypeName, dataId=dataId) 

1400 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1401 

1402 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1403 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1404 

1405 # Now need to hack the registry dataset type definition. 

1406 # There is no API for this. 

1407 manager = butler.registry._managers.datasets 

1408 manager._db.update( 

1409 manager._static.dataset_type, 

1410 {"name": datasetTypeName}, 

1411 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1412 ) 

1413 

1414 # Force reset of dataset type cache 

1415 butler.registry.refresh() 

1416 

1417 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1418 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1419 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1420 

1421 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1422 self.assertNotEqual(type(metric_model), type(metric)) 

1423 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1424 

1425 # Put the model and read it back to show that everything now 

1426 # works as normal. 

1427 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1428 metric_model_new = butler.get(metric_ref) 

1429 self.assertEqual(metric_model_new, metric_model) 

1430 

1431 # Hack the storage class again to something that will fail on the 

1432 # get with no conversion class. 

1433 manager._db.update( 

1434 manager._static.dataset_type, 

1435 {"name": datasetTypeName}, 

1436 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1437 ) 

1438 butler.registry.refresh() 

1439 

1440 with self.assertRaises(ValueError): 

1441 butler.get(datasetTypeName, dataId=dataId) 

1442 

1443 

1444class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1445 """InMemoryDatastore specialization of a butler""" 

1446 

1447 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1448 fullConfigKey = None 

1449 useTempRoot = False 

1450 validationCanFail = False 

1451 datastoreStr = ["datastore='InMemory"] 

1452 datastoreName = ["InMemoryDatastore@"] 

1453 registryStr = "/gen3.sqlite3" 

1454 

1455 def testIngest(self): 

1456 pass 

1457 

1458 

1459class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1460 """PosixDatastore specialization""" 

1461 

1462 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1463 fullConfigKey = ".datastore.datastores.1.formatters" 

1464 validationCanFail = True 

1465 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1466 datastoreName = [ 

1467 "InMemoryDatastore@", 

1468 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1469 "SecondDatastore", 

1470 ] 

1471 registryStr = "/gen3.sqlite3" 

1472 

1473 

1474class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1475 """Test that a yaml file in one location can refer to a root in another.""" 

1476 

1477 datastoreStr = ["dir1"] 

1478 # Disable the makeRepo test since we are deliberately not using 

1479 # butler.yaml as the config name. 

1480 fullConfigKey = None 

1481 

1482 def setUp(self): 

1483 self.root = makeTestTempDir(TESTDIR) 

1484 

1485 # Make a new repository in one place 

1486 self.dir1 = os.path.join(self.root, "dir1") 

1487 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1488 

1489 # Move the yaml file to a different place and add a "root" 

1490 self.dir2 = os.path.join(self.root, "dir2") 

1491 os.makedirs(self.dir2, exist_ok=True) 

1492 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1493 config = Config(configFile1) 

1494 config["root"] = self.dir1 

1495 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1496 config.dumpToUri(configFile2) 

1497 os.remove(configFile1) 

1498 self.tmpConfigFile = configFile2 

1499 

1500 def testFileLocations(self): 

1501 self.assertNotEqual(self.dir1, self.dir2) 

1502 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1503 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1504 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1505 

1506 

1507class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1508 """Test that a config file created by makeRepo outside of repo works.""" 

1509 

1510 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1511 

1512 def setUp(self): 

1513 self.root = makeTestTempDir(TESTDIR) 

1514 self.root2 = makeTestTempDir(TESTDIR) 

1515 

1516 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1517 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1518 

1519 def tearDown(self): 

1520 if os.path.exists(self.root2): 

1521 shutil.rmtree(self.root2, ignore_errors=True) 

1522 super().tearDown() 

1523 

1524 def testConfigExistence(self): 

1525 c = Config(self.tmpConfigFile) 

1526 uri_config = ResourcePath(c["root"]) 

1527 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1528 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1529 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1530 

1531 def testPutGet(self): 

1532 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1533 self.runPutGetTest(storageClass, "test_metric") 

1534 

1535 

1536class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1537 """Test that a config file created by makeRepo outside of repo works.""" 

1538 

1539 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1540 

1541 def setUp(self): 

1542 self.root = makeTestTempDir(TESTDIR) 

1543 self.root2 = makeTestTempDir(TESTDIR) 

1544 

1545 self.tmpConfigFile = self.root2 

1546 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1547 

1548 def testConfigExistence(self): 

1549 # Append the yaml file else Config constructor does not know the file 

1550 # type. 

1551 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1552 super().testConfigExistence() 

1553 

1554 

1555class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1556 """Test that a config file created by makeRepo outside of repo works.""" 

1557 

1558 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1559 

1560 def setUp(self): 

1561 self.root = makeTestTempDir(TESTDIR) 

1562 self.root2 = makeTestTempDir(TESTDIR) 

1563 

1564 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1565 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1566 

1567 

1568@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1569@mock_s3 

1570class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1571 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1572 a local in-memory SqlRegistry. 

1573 """ 

1574 

1575 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1576 fullConfigKey = None 

1577 validationCanFail = True 

1578 

1579 bucketName = "anybucketname" 

1580 """Name of the Bucket that will be used in the tests. The name is read from 

1581 the config file used with the tests during set-up. 

1582 """ 

1583 

1584 root = "butlerRoot/" 

1585 """Root repository directory expected to be used in case useTempRoot=False. 

1586 Otherwise the root is set to a 20 characters long randomly generated string 

1587 during set-up. 

1588 """ 

1589 

1590 datastoreStr = [f"datastore={root}"] 

1591 """Contains all expected root locations in a format expected to be 

1592 returned by Butler stringification. 

1593 """ 

1594 

1595 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1596 """The expected format of the S3 Datastore string.""" 

1597 

1598 registryStr = "/gen3.sqlite3" 

1599 """Expected format of the Registry string.""" 

1600 

1601 def genRoot(self): 

1602 """Returns a random string of len 20 to serve as a root 

1603 name for the temporary bucket repo. 

1604 

1605 This is equivalent to tempfile.mkdtemp as this is what self.root 

1606 becomes when useTempRoot is True. 

1607 """ 

1608 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1609 return rndstr + "/" 

1610 

1611 def setUp(self): 

1612 config = Config(self.configFile) 

1613 uri = ResourcePath(config[".datastore.datastore.root"]) 

1614 self.bucketName = uri.netloc 

1615 

1616 # set up some fake credentials if they do not exist 

1617 self.usingDummyCredentials = setAwsEnvCredentials() 

1618 

1619 if self.useTempRoot: 

1620 self.root = self.genRoot() 

1621 rooturi = f"s3://{self.bucketName}/{self.root}" 

1622 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1623 

1624 # need local folder to store registry database 

1625 self.reg_dir = makeTestTempDir(TESTDIR) 

1626 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1627 

1628 # MOTO needs to know that we expect Bucket bucketname to exist 

1629 # (this used to be the class attribute bucketName) 

1630 s3 = boto3.resource("s3") 

1631 s3.create_bucket(Bucket=self.bucketName) 

1632 

1633 self.datastoreStr = f"datastore={self.root}" 

1634 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1635 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1636 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1637 

1638 def tearDown(self): 

1639 s3 = boto3.resource("s3") 

1640 bucket = s3.Bucket(self.bucketName) 

1641 try: 

1642 bucket.objects.all().delete() 

1643 except botocore.exceptions.ClientError as e: 

1644 if e.response["Error"]["Code"] == "404": 

1645 # the key was not reachable - pass 

1646 pass 

1647 else: 

1648 raise 

1649 

1650 bucket = s3.Bucket(self.bucketName) 

1651 bucket.delete() 

1652 

1653 # unset any potentially set dummy credentials 

1654 if self.usingDummyCredentials: 

1655 unsetAwsEnvCredentials() 

1656 

1657 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1658 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1659 

1660 if self.useTempRoot and os.path.exists(self.root): 

1661 shutil.rmtree(self.root, ignore_errors=True) 

1662 

1663 

1664@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1665# Mock required environment variables during tests 

1666@unittest.mock.patch.dict( 

1667 os.environ, 

1668 { 

1669 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1670 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1671 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1672 }, 

1673) 

1674class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1675 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1676 a local in-memory SqlRegistry. 

1677 """ 

1678 

1679 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1680 fullConfigKey = None 

1681 validationCanFail = True 

1682 

1683 serverName = "localhost" 

1684 """Name of the server that will be used in the tests. 

1685 """ 

1686 

1687 portNumber = 8080 

1688 """Port on which the webdav server listens. Automatically chosen 

1689 at setUpClass via the _getfreeport() method 

1690 """ 

1691 

1692 root = "butlerRoot/" 

1693 """Root repository directory expected to be used in case useTempRoot=False. 

1694 Otherwise the root is set to a 20 characters long randomly generated string 

1695 during set-up. 

1696 """ 

1697 

1698 datastoreStr = [f"datastore={root}"] 

1699 """Contains all expected root locations in a format expected to be 

1700 returned by Butler stringification. 

1701 """ 

1702 

1703 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1704 """The expected format of the WebdavDatastore string.""" 

1705 

1706 registryStr = "/gen3.sqlite3" 

1707 """Expected format of the Registry string.""" 

1708 

1709 serverThread = None 

1710 """Thread in which the local webdav server will run""" 

1711 

1712 stopWebdavServer = False 

1713 """This flag will cause the webdav server to 

1714 gracefully shut down when True 

1715 """ 

1716 

1717 def genRoot(self): 

1718 """Returns a random string of len 20 to serve as a root 

1719 name for the temporary bucket repo. 

1720 

1721 This is equivalent to tempfile.mkdtemp as this is what self.root 

1722 becomes when useTempRoot is True. 

1723 """ 

1724 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1725 return rndstr + "/" 

1726 

1727 @classmethod 

1728 def setUpClass(cls): 

1729 # Do the same as inherited class 

1730 cls.storageClassFactory = StorageClassFactory() 

1731 cls.storageClassFactory.addFromConfig(cls.configFile) 

1732 

1733 cls.portNumber = cls._getfreeport() 

1734 # Run a local webdav server on which tests will be run 

1735 cls.serverThread = Thread( 

1736 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1737 ) 

1738 cls.serverThread.start() 

1739 # Wait for it to start 

1740 time.sleep(3) 

1741 

1742 @classmethod 

1743 def tearDownClass(cls): 

1744 # Ask for graceful shut down of the webdav server 

1745 cls.stopWebdavServer = True 

1746 # Wait for the thread to exit 

1747 cls.serverThread.join() 

1748 

1749 # Mock required environment variables during tests 

1750 @unittest.mock.patch.dict( 

1751 os.environ, 

1752 { 

1753 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1754 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1755 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1756 }, 

1757 ) 

1758 def setUp(self): 

1759 config = Config(self.configFile) 

1760 

1761 if self.useTempRoot: 

1762 self.root = self.genRoot() 

1763 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1764 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1765 

1766 # need local folder to store registry database 

1767 self.reg_dir = makeTestTempDir(TESTDIR) 

1768 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1769 

1770 self.datastoreStr = f"datastore={self.root}" 

1771 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1772 

1773 if not isWebdavEndpoint(self.rooturi): 

1774 raise OSError("Webdav server not running properly: cannot run tests.") 

1775 

1776 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1777 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1778 

1779 # Mock required environment variables during tests 

1780 @unittest.mock.patch.dict( 

1781 os.environ, 

1782 { 

1783 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1784 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1785 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1786 }, 

1787 ) 

1788 def tearDown(self): 

1789 # Clear temporary directory 

1790 ResourcePath(self.rooturi).remove() 

1791 ResourcePath(self.rooturi).session.close() 

1792 

1793 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1794 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1795 

1796 if self.useTempRoot and os.path.exists(self.root): 

1797 shutil.rmtree(self.root, ignore_errors=True) 

1798 

1799 def _serveWebdav(self, port: int, stopWebdavServer): 

1800 """Starts a local webdav-compatible HTTP server, 

1801 Listening on http://localhost:port 

1802 This server only runs when this test class is instantiated, 

1803 and then shuts down. Must be started is a separate thread. 

1804 

1805 Parameters 

1806 ---------- 

1807 port : `int` 

1808 The port number on which the server should listen 

1809 """ 

1810 root_path = gettempdir() 

1811 

1812 config = { 

1813 "host": "0.0.0.0", 

1814 "port": port, 

1815 "provider_mapping": {"/": root_path}, 

1816 "http_authenticator": {"domain_controller": None}, 

1817 "simple_dc": {"user_mapping": {"*": True}}, 

1818 "verbose": 0, 

1819 } 

1820 app = WsgiDAVApp(config) 

1821 

1822 server_args = { 

1823 "bind_addr": (config["host"], config["port"]), 

1824 "wsgi_app": app, 

1825 } 

1826 server = wsgi.Server(**server_args) 

1827 server.prepare() 

1828 

1829 try: 

1830 # Start the actual server in a separate thread 

1831 t = Thread(target=server.serve, daemon=True) 

1832 t.start() 

1833 # watch stopWebdavServer, and gracefully 

1834 # shut down the server when True 

1835 while True: 

1836 if stopWebdavServer(): 

1837 break 

1838 time.sleep(1) 

1839 except KeyboardInterrupt: 

1840 print("Caught Ctrl-C, shutting down...") 

1841 finally: 

1842 server.stop() 

1843 t.join() 

1844 

1845 def _getfreeport(): 

1846 """ 

1847 Determines a free port using sockets. 

1848 """ 

1849 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1850 free_socket.bind(("0.0.0.0", 0)) 

1851 free_socket.listen() 

1852 port = free_socket.getsockname()[1] 

1853 free_socket.close() 

1854 return port 

1855 

1856 

1857class PosixDatastoreTransfers(unittest.TestCase): 

1858 """Test data transfers between butlers. 

1859 

1860 Test for different managers. UUID to UUID and integer to integer are 

1861 tested. UUID to integer is not supported since we do not currently 

1862 want to allow that. Integer to UUID is supported with the caveat 

1863 that UUID4 will be generated and this will be incorrect for raw 

1864 dataset types. The test ignores that. 

1865 """ 

1866 

1867 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1868 

1869 @classmethod 

1870 def setUpClass(cls): 

1871 cls.storageClassFactory = StorageClassFactory() 

1872 cls.storageClassFactory.addFromConfig(cls.configFile) 

1873 

1874 def setUp(self): 

1875 self.root = makeTestTempDir(TESTDIR) 

1876 self.config = Config(self.configFile) 

1877 

1878 def tearDown(self): 

1879 removeTestTempDir(self.root) 

1880 

1881 def create_butler(self, manager, label): 

1882 config = Config(self.configFile) 

1883 config["registry", "managers", "datasets"] = manager 

1884 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1885 

1886 def create_butlers(self, manager1, manager2): 

1887 self.source_butler = self.create_butler(manager1, "1") 

1888 self.target_butler = self.create_butler(manager2, "2") 

1889 

1890 def testTransferUuidToUuid(self): 

1891 self.create_butlers( 

1892 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1893 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1894 ) 

1895 # Setting id_gen_map should have no effect here 

1896 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1897 

1898 def testTransferIntToInt(self): 

1899 self.create_butlers( 

1900 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1901 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1902 ) 

1903 # int dataset ID only allows UNIQUE 

1904 self.assertButlerTransfers() 

1905 

1906 def testTransferIntToUuid(self): 

1907 self.create_butlers( 

1908 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1909 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1910 ) 

1911 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1912 

1913 def testTransferMissing(self): 

1914 """Test transfers where datastore records are missing. 

1915 

1916 This is how execution butler works. 

1917 """ 

1918 self.create_butlers( 

1919 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1920 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1921 ) 

1922 

1923 # Configure the source butler to allow trust. 

1924 self.source_butler.datastore.trustGetRequest = True 

1925 

1926 self.assertButlerTransfers(purge=True) 

1927 

1928 def testTransferMissingDisassembly(self): 

1929 """Test transfers where datastore records are missing. 

1930 

1931 This is how execution butler works. 

1932 """ 

1933 self.create_butlers( 

1934 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1935 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1936 ) 

1937 

1938 # Configure the source butler to allow trust. 

1939 self.source_butler.datastore.trustGetRequest = True 

1940 

1941 # Test disassembly. 

1942 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1943 

1944 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1945 """Test that a run can be transferred to another butler.""" 

1946 

1947 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1948 datasetTypeName = "random_data" 

1949 

1950 # Test will create 3 collections and we will want to transfer 

1951 # two of those three. 

1952 runs = ["run1", "run2", "other"] 

1953 

1954 # Also want to use two different dataset types to ensure that 

1955 # grouping works. 

1956 datasetTypeNames = ["random_data", "random_data_2"] 

1957 

1958 # Create the run collections in the source butler. 

1959 for run in runs: 

1960 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1961 

1962 # Create dimensions in both butlers (transfer will not create them). 

1963 n_exposures = 30 

1964 for butler in (self.source_butler, self.target_butler): 

1965 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1966 butler.registry.insertDimensionData( 

1967 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1968 ) 

1969 butler.registry.insertDimensionData( 

1970 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1971 ) 

1972 

1973 for i in range(n_exposures): 

1974 butler.registry.insertDimensionData( 

1975 "exposure", 

1976 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

1977 ) 

1978 

1979 # Create dataset types in the source butler. 

1980 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1981 for datasetTypeName in datasetTypeNames: 

1982 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1983 self.source_butler.registry.registerDatasetType(datasetType) 

1984 

1985 # Write a dataset to an unrelated run -- this will ensure that 

1986 # we are rewriting integer dataset ids in the target if necessary. 

1987 # Will not be relevant for UUID. 

1988 run = "distraction" 

1989 butler = Butler(butler=self.source_butler, run=run) 

1990 butler.put( 

1991 makeExampleMetrics(), 

1992 datasetTypeName, 

1993 exposure=1, 

1994 instrument="DummyCamComp", 

1995 physical_filter="d-r", 

1996 ) 

1997 

1998 # Write some example metrics to the source 

1999 butler = Butler(butler=self.source_butler) 

2000 

2001 # Set of DatasetRefs that should be in the list of refs to transfer 

2002 # but which will not be transferred. 

2003 deleted = set() 

2004 

2005 n_expected = 20 # Number of datasets expected to be transferred 

2006 source_refs = [] 

2007 for i in range(n_exposures): 

2008 # Put a third of datasets into each collection, only retain 

2009 # two thirds. 

2010 index = i % 3 

2011 run = runs[index] 

2012 datasetTypeName = datasetTypeNames[i % 2] 

2013 

2014 metric_data = { 

2015 "summary": {"counter": i}, 

2016 "output": {"text": "metric"}, 

2017 "data": [2 * x for x in range(i)], 

2018 } 

2019 metric = MetricsExample(**metric_data) 

2020 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2021 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2022 

2023 # Remove the datastore record using low-level API 

2024 if purge: 

2025 # Remove records for a fraction. 

2026 if index == 1: 

2027 

2028 # For one of these delete the file as well. 

2029 # This allows the "missing" code to filter the 

2030 # file out. 

2031 if not deleted: 

2032 primary, uris = butler.datastore.getURIs(ref) 

2033 if primary: 

2034 primary.remove() 

2035 for uri in uris.values(): 

2036 uri.remove() 

2037 n_expected -= 1 

2038 deleted.add(ref) 

2039 

2040 # Remove the datastore record. 

2041 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2042 

2043 if index < 2: 

2044 source_refs.append(ref) 

2045 if ref not in deleted: 

2046 new_metric = butler.get(ref.unresolved(), collections=run) 

2047 self.assertEqual(new_metric, metric) 

2048 

2049 # Create some bad dataset types to ensure we check for inconsistent 

2050 # definitions. 

2051 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2052 for datasetTypeName in datasetTypeNames: 

2053 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2054 self.target_butler.registry.registerDatasetType(datasetType) 

2055 with self.assertRaises(ConflictingDefinitionError): 

2056 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2057 # And remove the bad definitions. 

2058 for datasetTypeName in datasetTypeNames: 

2059 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2060 

2061 # Transfer without creating dataset types should fail. 

2062 with self.assertRaises(KeyError): 

2063 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2064 

2065 # Now transfer them to the second butler 

2066 with self.assertLogs(level=logging.DEBUG) as cm: 

2067 transferred = self.target_butler.transfer_from( 

2068 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2069 ) 

2070 self.assertEqual(len(transferred), n_expected) 

2071 log_output = ";".join(cm.output) 

2072 self.assertIn("found in datastore for chunk", log_output) 

2073 self.assertIn("Creating output run", log_output) 

2074 

2075 # Do the transfer twice to ensure that it will do nothing extra. 

2076 # Only do this if purge=True because it does not work for int 

2077 # dataset_id. 

2078 if purge: 

2079 # This should not need to register dataset types. 

2080 transferred = self.target_butler.transfer_from( 

2081 self.source_butler, source_refs, id_gen_map=id_gen_map 

2082 ) 

2083 self.assertEqual(len(transferred), n_expected) 

2084 

2085 # Also do an explicit low-level transfer to trigger some 

2086 # edge cases. 

2087 with self.assertLogs(level=logging.DEBUG) as cm: 

2088 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2089 log_output = ";".join(cm.output) 

2090 self.assertIn("no file artifacts exist", log_output) 

2091 

2092 with self.assertRaises(TypeError): 

2093 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2094 

2095 with self.assertRaises(ValueError): 

2096 self.target_butler.datastore.transfer_from( 

2097 self.source_butler.datastore, source_refs, transfer="split" 

2098 ) 

2099 

2100 # Now try to get the same refs from the new butler. 

2101 for ref in source_refs: 

2102 if ref not in deleted: 

2103 unresolved_ref = ref.unresolved() 

2104 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2105 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2106 self.assertEqual(new_metric, old_metric) 

2107 

2108 # Now prune run2 collection and create instead a CHAINED collection. 

2109 # This should block the transfer. 

2110 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2111 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2112 with self.assertRaises(TypeError): 

2113 # Re-importing the run1 datasets can be problematic if they 

2114 # use integer IDs so filter those out. 

2115 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2116 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2117 

2118 

2119if __name__ == "__main__": 2119 ↛ 2120line 2119 didn't jump to line 2120, because the condition on line 2119 was never true

2120 unittest.main()