Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24import shutil 

25import yaml 

26import tempfile 

27import lsst.utils.tests 

28 

29from lsst.utils import doImport 

30 

31from lsst.daf.butler import StorageClassFactory, StorageClass, DimensionUniverse, FileDataset 

32from lsst.daf.butler import DatastoreConfig, DatasetTypeNotSupportedError, DatastoreValidationError 

33from lsst.daf.butler.formatters.yaml import YamlFormatter 

34from lsst.daf.butler import (DatastoreCacheManager, DatastoreDisabledCacheManager, 

35 DatastoreCacheManagerConfig, Config, ButlerURI) 

36 

37from lsst.daf.butler.tests import (DatasetTestHelper, DatastoreTestHelper, BadWriteFormatter, 

38 BadNoWriteFormatter, MetricsExample, DummyRegistry) 

39 

40 

41TESTDIR = os.path.dirname(__file__) 

42 

43 

44def makeExampleMetrics(use_none=False): 

45 if use_none: 

46 array = None 

47 else: 

48 array = [563, 234, 456.7, 105, 2054, -1045] 

49 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

50 {"a": [1, 2, 3], 

51 "b": {"blue": 5, "red": "green"}}, 

52 array, 

53 ) 

54 

55 

56class TransactionTestError(Exception): 

57 """Specific error for transactions, to prevent misdiagnosing 

58 that might otherwise occur when a standard exception is used. 

59 """ 

60 pass 

61 

62 

63class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

64 """Support routines for datastore testing""" 

65 root = None 

66 

67 @classmethod 

68 def setUpClass(cls): 

69 # Storage Classes are fixed for all datastores in these tests 

70 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

71 cls.storageClassFactory = StorageClassFactory() 

72 cls.storageClassFactory.addFromConfig(scConfigFile) 

73 

74 # Read the Datastore config so we can get the class 

75 # information (since we should not assume the constructor 

76 # name here, but rely on the configuration file itself) 

77 datastoreConfig = DatastoreConfig(cls.configFile) 

78 cls.datastoreType = doImport(datastoreConfig["cls"]) 

79 cls.universe = DimensionUniverse() 

80 

81 def setUp(self): 

82 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

83 

84 def tearDown(self): 

85 if self.root is not None and os.path.exists(self.root): 

86 shutil.rmtree(self.root, ignore_errors=True) 

87 

88 

89class DatastoreTests(DatastoreTestsBase): 

90 """Some basic tests of a simple datastore.""" 

91 

92 hasUnsupportedPut = True 

93 

94 def testConfigRoot(self): 

95 full = DatastoreConfig(self.configFile) 

96 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

97 newroot = "/random/location" 

98 self.datastoreType.setConfigRoot(newroot, config, full) 

99 if self.rootKeys: 

100 for k in self.rootKeys: 

101 self.assertIn(newroot, config[k]) 

102 

103 def testConstructor(self): 

104 datastore = self.makeDatastore() 

105 self.assertIsNotNone(datastore) 

106 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

107 

108 def testConfigurationValidation(self): 

109 datastore = self.makeDatastore() 

110 sc = self.storageClassFactory.getStorageClass("ThingOne") 

111 datastore.validateConfiguration([sc]) 

112 

113 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

114 if self.validationCanFail: 

115 with self.assertRaises(DatastoreValidationError): 

116 datastore.validateConfiguration([sc2], logFailures=True) 

117 

118 dimensions = self.universe.extract(("visit", "physical_filter")) 

119 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

120 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

121 datastore.validateConfiguration([ref]) 

122 

123 def testParameterValidation(self): 

124 """Check that parameters are validated""" 

125 sc = self.storageClassFactory.getStorageClass("ThingOne") 

126 dimensions = self.universe.extract(("visit", "physical_filter")) 

127 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

128 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

129 datastore = self.makeDatastore() 

130 data = {1: 2, 3: 4} 

131 datastore.put(data, ref) 

132 newdata = datastore.get(ref) 

133 self.assertEqual(data, newdata) 

134 with self.assertRaises(KeyError): 

135 newdata = datastore.get(ref, parameters={"missing": 5}) 

136 

137 def testBasicPutGet(self): 

138 metrics = makeExampleMetrics() 

139 datastore = self.makeDatastore() 

140 

141 # Create multiple storage classes for testing different formulations 

142 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

143 for sc in ("StructuredData", 

144 "StructuredDataJson", 

145 "StructuredDataPickle")] 

146 

147 dimensions = self.universe.extract(("visit", "physical_filter")) 

148 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

149 

150 for sc in storageClasses: 

151 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

152 print("Using storageClass: {}".format(sc.name)) 

153 datastore.put(metrics, ref) 

154 

155 # Does it exist? 

156 self.assertTrue(datastore.exists(ref)) 

157 

158 # Get 

159 metricsOut = datastore.get(ref, parameters=None) 

160 self.assertEqual(metrics, metricsOut) 

161 

162 uri = datastore.getURI(ref) 

163 self.assertEqual(uri.scheme, self.uriScheme) 

164 

165 # Get a component -- we need to construct new refs for them 

166 # with derived storage classes but with parent ID 

167 for comp in ("data", "output"): 

168 compRef = ref.makeComponentRef(comp) 

169 output = datastore.get(compRef) 

170 self.assertEqual(output, getattr(metricsOut, comp)) 

171 

172 uri = datastore.getURI(compRef) 

173 self.assertEqual(uri.scheme, self.uriScheme) 

174 

175 storageClass = sc 

176 

177 # Check that we can put a metric with None in a component and 

178 # get it back as None 

179 metricsNone = makeExampleMetrics(use_none=True) 

180 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

181 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

182 datastore.put(metricsNone, refNone) 

183 

184 comp = "data" 

185 for comp in ("data", "output"): 

186 compRef = refNone.makeComponentRef(comp) 

187 output = datastore.get(compRef) 

188 self.assertEqual(output, getattr(metricsNone, comp)) 

189 

190 # Check that a put fails if the dataset type is not supported 

191 if self.hasUnsupportedPut: 

192 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

193 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

194 with self.assertRaises(DatasetTypeNotSupportedError): 

195 datastore.put(metrics, ref) 

196 

197 # These should raise 

198 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

199 with self.assertRaises(FileNotFoundError): 

200 # non-existing file 

201 datastore.get(ref) 

202 

203 # Get a URI from it 

204 uri = datastore.getURI(ref, predict=True) 

205 self.assertEqual(uri.scheme, self.uriScheme) 

206 

207 with self.assertRaises(FileNotFoundError): 

208 datastore.getURI(ref) 

209 

210 def testTrustGetRequest(self): 

211 """Check that we can get datasets that registry knows nothing about. 

212 """ 

213 

214 datastore = self.makeDatastore() 

215 

216 # Skip test if the attribute is not defined 

217 if not hasattr(datastore, "trustGetRequest"): 

218 return 

219 

220 metrics = makeExampleMetrics() 

221 

222 i = 0 

223 for sc_name in ("StructuredData", "StructuredComposite"): 

224 i += 1 

225 datasetTypeName = f"metric{i}" 

226 

227 if sc_name == "StructuredComposite": 

228 disassembled = True 

229 else: 

230 disassembled = False 

231 

232 # Start datastore in default configuration of using registry 

233 datastore.trustGetRequest = False 

234 

235 # Create multiple storage classes for testing with or without 

236 # disassembly 

237 sc = self.storageClassFactory.getStorageClass(sc_name) 

238 dimensions = self.universe.extract(("visit", "physical_filter")) 

239 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"} 

240 

241 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

242 datastore.put(metrics, ref) 

243 

244 # Does it exist? 

245 self.assertTrue(datastore.exists(ref)) 

246 

247 # Get 

248 metricsOut = datastore.get(ref) 

249 self.assertEqual(metrics, metricsOut) 

250 

251 # Get the URI(s) 

252 primaryURI, componentURIs = datastore.getURIs(ref) 

253 if disassembled: 

254 self.assertIsNone(primaryURI) 

255 self.assertEqual(len(componentURIs), 3) 

256 else: 

257 self.assertIn(datasetTypeName, primaryURI.path) 

258 self.assertFalse(componentURIs) 

259 

260 # Delete registry entry so now we are trusting 

261 datastore.removeStoredItemInfo(ref) 

262 

263 # Now stop trusting and check that things break 

264 datastore.trustGetRequest = False 

265 

266 # Does it exist? 

267 self.assertFalse(datastore.exists(ref)) 

268 

269 with self.assertRaises(FileNotFoundError): 

270 datastore.get(ref) 

271 

272 with self.assertRaises(FileNotFoundError): 

273 datastore.get(ref.makeComponentRef("data")) 

274 

275 # URI should fail unless we ask for prediction 

276 with self.assertRaises(FileNotFoundError): 

277 datastore.getURIs(ref) 

278 

279 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

280 if disassembled: 

281 self.assertIsNone(predicted_primary) 

282 self.assertEqual(len(predicted_disassembled), 3) 

283 for uri in predicted_disassembled.values(): 

284 self.assertEqual(uri.fragment, "predicted") 

285 self.assertIn(datasetTypeName, uri.path) 

286 else: 

287 self.assertIn(datasetTypeName, predicted_primary.path) 

288 self.assertFalse(predicted_disassembled) 

289 self.assertEqual(predicted_primary.fragment, "predicted") 

290 

291 # Now enable registry-free trusting mode 

292 datastore.trustGetRequest = True 

293 

294 # Try again to get it 

295 metricsOut = datastore.get(ref) 

296 self.assertEqual(metricsOut, metrics) 

297 

298 # Does it exist? 

299 self.assertTrue(datastore.exists(ref)) 

300 

301 # Get a component 

302 comp = "data" 

303 compRef = ref.makeComponentRef(comp) 

304 output = datastore.get(compRef) 

305 self.assertEqual(output, getattr(metrics, comp)) 

306 

307 # Get the URI -- if we trust this should work even without 

308 # enabling prediction. 

309 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

310 self.assertEqual(primaryURI2, primaryURI) 

311 self.assertEqual(componentURIs2, componentURIs) 

312 

313 def testDisassembly(self): 

314 """Test disassembly within datastore.""" 

315 metrics = makeExampleMetrics() 

316 if self.isEphemeral: 

317 # in-memory datastore does not disassemble 

318 return 

319 

320 # Create multiple storage classes for testing different formulations 

321 # of composites. One of these will not disassemble to provide 

322 # a reference. 

323 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

324 for sc in ("StructuredComposite", 

325 "StructuredCompositeTestA", 

326 "StructuredCompositeTestB", 

327 "StructuredCompositeReadComp", 

328 "StructuredData", # No disassembly 

329 "StructuredCompositeReadCompNoDisassembly", 

330 )] 

331 

332 # Create the test datastore 

333 datastore = self.makeDatastore() 

334 

335 # Dummy dataId 

336 dimensions = self.universe.extract(("visit", "physical_filter")) 

337 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

338 

339 for i, sc in enumerate(storageClasses): 

340 with self.subTest(storageClass=sc.name): 

341 # Create a different dataset type each time round 

342 # so that a test failure in this subtest does not trigger 

343 # a cascade of tests because of file clashes 

344 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, 

345 conform=False) 

346 

347 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

348 

349 datastore.put(metrics, ref) 

350 

351 baseURI, compURIs = datastore.getURIs(ref) 

352 if disassembled: 

353 self.assertIsNone(baseURI) 

354 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

355 else: 

356 self.assertIsNotNone(baseURI) 

357 self.assertEqual(compURIs, {}) 

358 

359 metrics_get = datastore.get(ref) 

360 self.assertEqual(metrics_get, metrics) 

361 

362 # Retrieve the composite with read parameter 

363 stop = 4 

364 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

365 self.assertEqual(metrics_get.summary, metrics.summary) 

366 self.assertEqual(metrics_get.output, metrics.output) 

367 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

368 

369 # Retrieve a component 

370 data = datastore.get(ref.makeComponentRef("data")) 

371 self.assertEqual(data, metrics.data) 

372 

373 # On supported storage classes attempt to access a read 

374 # only component 

375 if "ReadComp" in sc.name: 

376 cRef = ref.makeComponentRef("counter") 

377 counter = datastore.get(cRef) 

378 self.assertEqual(counter, len(metrics.data)) 

379 

380 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

381 self.assertEqual(counter, stop) 

382 

383 datastore.remove(ref) 

384 

385 def testRegistryCompositePutGet(self): 

386 """Tests the case where registry disassembles and puts to datastore. 

387 """ 

388 metrics = makeExampleMetrics() 

389 datastore = self.makeDatastore() 

390 

391 # Create multiple storage classes for testing different formulations 

392 # of composites 

393 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

394 for sc in ("StructuredComposite", 

395 "StructuredCompositeTestA", 

396 "StructuredCompositeTestB", 

397 )] 

398 

399 dimensions = self.universe.extract(("visit", "physical_filter")) 

400 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

401 

402 for sc in storageClasses: 

403 print("Using storageClass: {}".format(sc.name)) 

404 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, 

405 conform=False) 

406 

407 components = sc.delegate().disassemble(metrics) 

408 self.assertTrue(components) 

409 

410 compsRead = {} 

411 for compName, compInfo in components.items(): 

412 compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions, 

413 components[compName].storageClass, dataId, 

414 conform=False) 

415 

416 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name)) 

417 datastore.put(compInfo.component, compRef) 

418 

419 uri = datastore.getURI(compRef) 

420 self.assertEqual(uri.scheme, self.uriScheme) 

421 

422 compsRead[compName] = datastore.get(compRef) 

423 

424 # We can generate identical files for each storage class 

425 # so remove the component here 

426 datastore.remove(compRef) 

427 

428 # combine all the components we read back into a new composite 

429 metricsOut = sc.delegate().assemble(compsRead) 

430 self.assertEqual(metrics, metricsOut) 

431 

432 def prepDeleteTest(self): 

433 metrics = makeExampleMetrics() 

434 datastore = self.makeDatastore() 

435 # Put 

436 dimensions = self.universe.extract(("visit", "physical_filter")) 

437 dataId = {"instrument": "dummy", "visit": 638, "physical_filter": "U"} 

438 

439 sc = self.storageClassFactory.getStorageClass("StructuredData") 

440 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

441 datastore.put(metrics, ref) 

442 

443 # Does it exist? 

444 self.assertTrue(datastore.exists(ref)) 

445 

446 # Get 

447 metricsOut = datastore.get(ref) 

448 self.assertEqual(metrics, metricsOut) 

449 

450 return datastore, ref 

451 

452 def testRemove(self): 

453 datastore, ref = self.prepDeleteTest() 

454 

455 # Remove 

456 datastore.remove(ref) 

457 

458 # Does it exist? 

459 self.assertFalse(datastore.exists(ref)) 

460 

461 # Do we now get a predicted URI? 

462 uri = datastore.getURI(ref, predict=True) 

463 self.assertEqual(uri.fragment, "predicted") 

464 

465 # Get should now fail 

466 with self.assertRaises(FileNotFoundError): 

467 datastore.get(ref) 

468 # Can only delete once 

469 with self.assertRaises(FileNotFoundError): 

470 datastore.remove(ref) 

471 

472 def testForget(self): 

473 datastore, ref = self.prepDeleteTest() 

474 

475 # Remove 

476 datastore.forget([ref]) 

477 

478 # Does it exist (as far as we know)? 

479 self.assertFalse(datastore.exists(ref)) 

480 

481 # Do we now get a predicted URI? 

482 uri = datastore.getURI(ref, predict=True) 

483 self.assertEqual(uri.fragment, "predicted") 

484 

485 # Get should now fail 

486 with self.assertRaises(FileNotFoundError): 

487 datastore.get(ref) 

488 

489 # Forgetting again is a silent no-op 

490 datastore.forget([ref]) 

491 

492 # Predicted URI should still point to the file. 

493 self.assertTrue(uri.exists()) 

494 

495 def testTransfer(self): 

496 metrics = makeExampleMetrics() 

497 

498 dimensions = self.universe.extract(("visit", "physical_filter")) 

499 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"} 

500 

501 sc = self.storageClassFactory.getStorageClass("StructuredData") 

502 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

503 

504 inputDatastore = self.makeDatastore("test_input_datastore") 

505 outputDatastore = self.makeDatastore("test_output_datastore") 

506 

507 inputDatastore.put(metrics, ref) 

508 outputDatastore.transfer(inputDatastore, ref) 

509 

510 metricsOut = outputDatastore.get(ref) 

511 self.assertEqual(metrics, metricsOut) 

512 

513 def testBasicTransaction(self): 

514 datastore = self.makeDatastore() 

515 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

516 dimensions = self.universe.extract(("visit", "physical_filter")) 

517 nDatasets = 6 

518 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] 

519 data = [(self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

520 makeExampleMetrics(),) 

521 for dataId in dataIds] 

522 succeed = data[:nDatasets//2] 

523 fail = data[nDatasets//2:] 

524 # All datasets added in this transaction should continue to exist 

525 with datastore.transaction(): 

526 for ref, metrics in succeed: 

527 datastore.put(metrics, ref) 

528 # Whereas datasets added in this transaction should not 

529 with self.assertRaises(TransactionTestError): 

530 with datastore.transaction(): 

531 for ref, metrics in fail: 

532 datastore.put(metrics, ref) 

533 raise TransactionTestError("This should propagate out of the context manager") 

534 # Check for datasets that should exist 

535 for ref, metrics in succeed: 

536 # Does it exist? 

537 self.assertTrue(datastore.exists(ref)) 

538 # Get 

539 metricsOut = datastore.get(ref, parameters=None) 

540 self.assertEqual(metrics, metricsOut) 

541 # URI 

542 uri = datastore.getURI(ref) 

543 self.assertEqual(uri.scheme, self.uriScheme) 

544 # Check for datasets that should not exist 

545 for ref, _ in fail: 

546 # These should raise 

547 with self.assertRaises(FileNotFoundError): 

548 # non-existing file 

549 datastore.get(ref) 

550 with self.assertRaises(FileNotFoundError): 

551 datastore.getURI(ref) 

552 

553 def testNestedTransaction(self): 

554 datastore = self.makeDatastore() 

555 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

556 dimensions = self.universe.extract(("visit", "physical_filter")) 

557 metrics = makeExampleMetrics() 

558 

559 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

560 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

561 conform=False) 

562 datastore.put(metrics, refBefore) 

563 with self.assertRaises(TransactionTestError): 

564 with datastore.transaction(): 

565 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} 

566 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

567 conform=False) 

568 datastore.put(metrics, refOuter) 

569 with datastore.transaction(): 

570 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} 

571 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

572 conform=False) 

573 datastore.put(metrics, refInner) 

574 # All datasets should exist 

575 for ref in (refBefore, refOuter, refInner): 

576 metricsOut = datastore.get(ref, parameters=None) 

577 self.assertEqual(metrics, metricsOut) 

578 raise TransactionTestError("This should roll back the transaction") 

579 # Dataset(s) inserted before the transaction should still exist 

580 metricsOut = datastore.get(refBefore, parameters=None) 

581 self.assertEqual(metrics, metricsOut) 

582 # But all datasets inserted during the (rolled back) transaction 

583 # should be gone 

584 with self.assertRaises(FileNotFoundError): 

585 datastore.get(refOuter) 

586 with self.assertRaises(FileNotFoundError): 

587 datastore.get(refInner) 

588 

589 def _prepareIngestTest(self): 

590 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

591 dimensions = self.universe.extract(("visit", "physical_filter")) 

592 metrics = makeExampleMetrics() 

593 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

594 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

595 return metrics, ref 

596 

597 def runIngestTest(self, func, expectOutput=True): 

598 metrics, ref = self._prepareIngestTest() 

599 # The file will be deleted after the test. 

600 # For symlink tests this leads to a situation where the datastore 

601 # points to a file that does not exist. This will make os.path.exist 

602 # return False but then the new symlink will fail with 

603 # FileExistsError later in the code so the test still passes. 

604 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

605 with open(path, 'w') as fd: 

606 yaml.dump(metrics._asdict(), stream=fd) 

607 func(metrics, path, ref) 

608 

609 def testIngestNoTransfer(self): 

610 """Test ingesting existing files with no transfer. 

611 """ 

612 for mode in (None, "auto"): 

613 

614 # Some datastores have auto but can't do in place transfer 

615 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

616 continue 

617 

618 with self.subTest(mode=mode): 

619 datastore = self.makeDatastore() 

620 

621 def succeed(obj, path, ref): 

622 """Ingest a file already in the datastore root.""" 

623 # first move it into the root, and adjust the path 

624 # accordingly 

625 path = shutil.copy(path, datastore.root.ospath) 

626 path = os.path.relpath(path, start=datastore.root.ospath) 

627 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

628 self.assertEqual(obj, datastore.get(ref)) 

629 

630 def failInputDoesNotExist(obj, path, ref): 

631 """Can't ingest files if we're given a bad path.""" 

632 with self.assertRaises(FileNotFoundError): 

633 datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref), 

634 transfer=mode) 

635 self.assertFalse(datastore.exists(ref)) 

636 

637 def failOutsideRoot(obj, path, ref): 

638 """Can't ingest files outside of datastore root unless 

639 auto.""" 

640 if mode == "auto": 

641 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

642 self.assertTrue(datastore.exists(ref)) 

643 else: 

644 with self.assertRaises(RuntimeError): 

645 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

646 self.assertFalse(datastore.exists(ref)) 

647 

648 def failNotImplemented(obj, path, ref): 

649 with self.assertRaises(NotImplementedError): 

650 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

651 

652 if mode in self.ingestTransferModes: 

653 self.runIngestTest(failOutsideRoot) 

654 self.runIngestTest(failInputDoesNotExist) 

655 self.runIngestTest(succeed) 

656 else: 

657 self.runIngestTest(failNotImplemented) 

658 

659 def testIngestTransfer(self): 

660 """Test ingesting existing files after transferring them. 

661 """ 

662 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

663 with self.subTest(mode=mode): 

664 datastore = self.makeDatastore(mode) 

665 

666 def succeed(obj, path, ref): 

667 """Ingest a file by transferring it to the template 

668 location.""" 

669 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

670 self.assertEqual(obj, datastore.get(ref)) 

671 

672 def failInputDoesNotExist(obj, path, ref): 

673 """Can't ingest files if we're given a bad path.""" 

674 with self.assertRaises(FileNotFoundError): 

675 # Ensure the file does not look like it is in 

676 # datastore for auto mode 

677 datastore.ingest(FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), 

678 transfer=mode) 

679 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

680 

681 def failOutputExists(obj, path, ref): 

682 """Can't ingest files if transfer destination already 

683 exists.""" 

684 with self.assertRaises(FileExistsError): 

685 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

686 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

687 

688 def failNotImplemented(obj, path, ref): 

689 with self.assertRaises(NotImplementedError): 

690 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

691 

692 if mode in self.ingestTransferModes: 

693 self.runIngestTest(failInputDoesNotExist) 

694 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

695 self.runIngestTest(failOutputExists) 

696 else: 

697 self.runIngestTest(failNotImplemented) 

698 

699 def testIngestSymlinkOfSymlink(self): 

700 """Special test for symlink to a symlink ingest""" 

701 metrics, ref = self._prepareIngestTest() 

702 # The aim of this test is to create a dataset on disk, then 

703 # create a symlink to it and finally ingest the symlink such that 

704 # the symlink in the datastore points to the original dataset. 

705 for mode in ("symlink", "relsymlink"): 

706 if mode not in self.ingestTransferModes: 

707 continue 

708 

709 print(f"Trying mode {mode}") 

710 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

711 with open(realpath, 'w') as fd: 

712 yaml.dump(metrics._asdict(), stream=fd) 

713 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

714 os.symlink(os.path.abspath(realpath), sympath) 

715 

716 datastore = self.makeDatastore() 

717 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

718 

719 uri = datastore.getURI(ref) 

720 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

721 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

722 

723 linkTarget = os.readlink(uri.ospath) 

724 if mode == "relsymlink": 

725 self.assertFalse(os.path.isabs(linkTarget)) 

726 else: 

727 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

728 

729 # Check that we can get the dataset back regardless of mode 

730 metric2 = datastore.get(ref) 

731 self.assertEqual(metric2, metrics) 

732 

733 # Cleanup the file for next time round loop 

734 # since it will get the same file name in store 

735 datastore.remove(ref) 

736 

737 

738class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

739 """PosixDatastore specialization""" 

740 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

741 uriScheme = "file" 

742 canIngestNoTransferAuto = True 

743 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

744 isEphemeral = False 

745 rootKeys = ("root",) 

746 validationCanFail = True 

747 

748 def setUp(self): 

749 # Override the working directory before calling the base class 

750 self.root = tempfile.mkdtemp(dir=TESTDIR) 

751 super().setUp() 

752 

753 

754class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

755 """Posix datastore tests but with checksums disabled.""" 

756 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

757 

758 def testChecksum(self): 

759 """Ensure that checksums have not been calculated.""" 

760 

761 datastore = self.makeDatastore() 

762 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

763 dimensions = self.universe.extract(("visit", "physical_filter")) 

764 metrics = makeExampleMetrics() 

765 

766 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

767 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

768 conform=False) 

769 

770 # Configuration should have disabled checksum calculation 

771 datastore.put(metrics, ref) 

772 infos = datastore.getStoredItemsInfo(ref) 

773 self.assertIsNone(infos[0].checksum) 

774 

775 # Remove put back but with checksums enabled explicitly 

776 datastore.remove(ref) 

777 datastore.useChecksum = True 

778 datastore.put(metrics, ref) 

779 

780 infos = datastore.getStoredItemsInfo(ref) 

781 self.assertIsNotNone(infos[0].checksum) 

782 

783 

784class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

785 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

786 

787 def setUp(self): 

788 # Override the working directory before calling the base class 

789 self.root = tempfile.mkdtemp(dir=TESTDIR) 

790 super().setUp() 

791 

792 def testCleanup(self): 

793 """Test that a failed formatter write does cleanup a partial file.""" 

794 metrics = makeExampleMetrics() 

795 datastore = self.makeDatastore() 

796 

797 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

798 

799 dimensions = self.universe.extract(("visit", "physical_filter")) 

800 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

801 

802 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

803 

804 # Determine where the file will end up (we assume Formatters use 

805 # the same file extension) 

806 expectedUri = datastore.getURI(ref, predict=True) 

807 self.assertEqual(expectedUri.fragment, "predicted") 

808 

809 self.assertEqual(expectedUri.getExtension(), ".yaml", 

810 f"Is there a file extension in {expectedUri}") 

811 

812 # Try formatter that fails and formatter that fails and leaves 

813 # a file behind 

814 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

815 with self.subTest(formatter=formatter): 

816 

817 # Monkey patch the formatter 

818 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, 

819 overwrite=True) 

820 

821 # Try to put the dataset, it should fail 

822 with self.assertRaises(Exception): 

823 datastore.put(metrics, ref) 

824 

825 # Check that there is no file on disk 

826 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

827 

828 # Check that there is a directory 

829 dir = expectedUri.dirname() 

830 self.assertTrue(dir.exists(), 

831 f"Check for existence of directory {dir}") 

832 

833 # Force YamlFormatter and check that this time a file is written 

834 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, 

835 overwrite=True) 

836 datastore.put(metrics, ref) 

837 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

838 datastore.remove(ref) 

839 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

840 

841 

842class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

843 """PosixDatastore specialization""" 

844 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

845 uriScheme = "mem" 

846 hasUnsupportedPut = False 

847 ingestTransferModes = () 

848 isEphemeral = True 

849 rootKeys = None 

850 validationCanFail = False 

851 

852 

853class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

854 """ChainedDatastore specialization using a POSIXDatastore""" 

855 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

856 hasUnsupportedPut = False 

857 canIngestNoTransferAuto = False 

858 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto") 

859 isEphemeral = False 

860 rootKeys = (".datastores.1.root", ".datastores.2.root") 

861 validationCanFail = True 

862 

863 

864class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

865 """ChainedDatastore specialization using all InMemoryDatastore""" 

866 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

867 validationCanFail = False 

868 

869 

870class DatastoreConstraintsTests(DatastoreTestsBase): 

871 """Basic tests of constraints model of Datastores.""" 

872 

873 def testConstraints(self): 

874 """Test constraints model. Assumes that each test class has the 

875 same constraints.""" 

876 metrics = makeExampleMetrics() 

877 datastore = self.makeDatastore() 

878 

879 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

880 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

881 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

882 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

883 

884 # Write empty file suitable for ingest check (JSON and YAML variants) 

885 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

886 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

887 for datasetTypeName, sc, accepted in (("metric", sc1, True), ("metric2", sc1, False), 

888 ("metric33", sc1, True), ("metric2", sc2, True)): 

889 # Choose different temp file depending on StorageClass 

890 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

891 

892 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

893 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

894 if accepted: 

895 datastore.put(metrics, ref) 

896 self.assertTrue(datastore.exists(ref)) 

897 datastore.remove(ref) 

898 

899 # Try ingest 

900 if self.canIngest: 

901 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

902 self.assertTrue(datastore.exists(ref)) 

903 datastore.remove(ref) 

904 else: 

905 with self.assertRaises(DatasetTypeNotSupportedError): 

906 datastore.put(metrics, ref) 

907 self.assertFalse(datastore.exists(ref)) 

908 

909 # Again with ingest 

910 if self.canIngest: 

911 with self.assertRaises(DatasetTypeNotSupportedError): 

912 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

913 self.assertFalse(datastore.exists(ref)) 

914 

915 

916class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

917 """PosixDatastore specialization""" 

918 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

919 canIngest = True 

920 

921 def setUp(self): 

922 # Override the working directory before calling the base class 

923 self.root = tempfile.mkdtemp(dir=TESTDIR) 

924 super().setUp() 

925 

926 

927class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

928 """InMemoryDatastore specialization""" 

929 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

930 canIngest = False 

931 

932 

933class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

934 """ChainedDatastore specialization using a POSIXDatastore and constraints 

935 at the ChainedDatstore """ 

936 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

937 

938 

939class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

940 """ChainedDatastore specialization using a POSIXDatastore""" 

941 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

942 

943 

944class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

945 """ChainedDatastore specialization using all InMemoryDatastore""" 

946 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

947 canIngest = False 

948 

949 

950class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

951 """Test that a chained datastore can control constraints per-datastore 

952 even if child datastore would accept.""" 

953 

954 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

955 

956 def setUp(self): 

957 # Override the working directory before calling the base class 

958 self.root = tempfile.mkdtemp(dir=TESTDIR) 

959 super().setUp() 

960 

961 def testConstraints(self): 

962 """Test chained datastore constraints model.""" 

963 metrics = makeExampleMetrics() 

964 datastore = self.makeDatastore() 

965 

966 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

967 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

968 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

969 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

970 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

971 

972 # Write empty file suitable for ingest check (JSON and YAML variants) 

973 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

974 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

975 

976 for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True), 

977 ("metric2", dataId1, sc1, (False, False, False), False), 

978 ("metric2", dataId2, sc1, (True, False, False), False), 

979 ("metric33", dataId2, sc2, (True, True, False), True), 

980 ("metric2", dataId1, sc2, (False, True, False), True)): 

981 

982 # Choose different temp file depending on StorageClass 

983 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

984 

985 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

986 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, 

987 conform=False) 

988 if any(accept): 

989 datastore.put(metrics, ref) 

990 self.assertTrue(datastore.exists(ref)) 

991 

992 # Check each datastore inside the chained datastore 

993 for childDatastore, expected in zip(datastore.datastores, accept): 

994 self.assertEqual(childDatastore.exists(ref), expected, 

995 f"Testing presence of {ref} in datastore {childDatastore.name}") 

996 

997 datastore.remove(ref) 

998 

999 # Check that ingest works 

1000 if ingest: 

1001 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1002 self.assertTrue(datastore.exists(ref)) 

1003 

1004 # Check each datastore inside the chained datastore 

1005 for childDatastore, expected in zip(datastore.datastores, accept): 

1006 # Ephemeral datastores means InMemory at the moment 

1007 # and that does not accept ingest of files. 

1008 if childDatastore.isEphemeral: 

1009 expected = False 

1010 self.assertEqual(childDatastore.exists(ref), expected, 

1011 f"Testing presence of ingested {ref} in datastore" 

1012 f" {childDatastore.name}") 

1013 

1014 datastore.remove(ref) 

1015 else: 

1016 with self.assertRaises(DatasetTypeNotSupportedError): 

1017 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1018 

1019 else: 

1020 with self.assertRaises(DatasetTypeNotSupportedError): 

1021 datastore.put(metrics, ref) 

1022 self.assertFalse(datastore.exists(ref)) 

1023 

1024 # Again with ingest 

1025 with self.assertRaises(DatasetTypeNotSupportedError): 

1026 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1027 self.assertFalse(datastore.exists(ref)) 

1028 

1029 

1030class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1031 """Tests for datastore caching infrastructure.""" 

1032 

1033 @classmethod 

1034 def setUpClass(cls): 

1035 cls.storageClassFactory = StorageClassFactory() 

1036 cls.universe = DimensionUniverse() 

1037 

1038 def setUp(self): 

1039 self.id = 0 

1040 

1041 # Create a root that we can use for caching tests. 

1042 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1043 

1044 # Create some test dataset refs and associated test files 

1045 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1046 dimensions = self.universe.extract(("visit", "physical_filter")) 

1047 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1048 

1049 # Create list of refs and list of temporary files 

1050 n_datasets = 2 

1051 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, 

1052 conform=False) for n in range(n_datasets)] 

1053 

1054 root_uri = ButlerURI(self.root, forceDirectory=True) 

1055 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1056 

1057 # Create empty files 

1058 for uri in self.files: 

1059 uri.write(b"") 

1060 

1061 def tearDown(self): 

1062 if self.root is not None and os.path.exists(self.root): 

1063 shutil.rmtree(self.root, ignore_errors=True) 

1064 

1065 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1066 config = Config.fromYaml(config_str) 

1067 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1068 

1069 def testNoCacheDir(self): 

1070 config_str = """ 

1071cached: 

1072 root: null 

1073 cacheable: 

1074 metric0: true 

1075 """ 

1076 cache_manager = self._make_cache_manager(config_str) 

1077 

1078 # Look inside to check we don't have a cache directory 

1079 self.assertIsNone(cache_manager._cache_directory) 

1080 

1081 self.assertCache(cache_manager) 

1082 

1083 # Test that the cache directory is marked temporary 

1084 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1085 

1086 def testExplicitCacheDir(self): 

1087 config_str = f""" 

1088cached: 

1089 root: '{self.root}' 

1090 cacheable: 

1091 metric0: true 

1092 """ 

1093 cache_manager = self._make_cache_manager(config_str) 

1094 

1095 # Look inside to check we do have a cache directory. 

1096 self.assertEqual(cache_manager.cache_directory, 

1097 ButlerURI(self.root, forceDirectory=True)) 

1098 

1099 self.assertCache(cache_manager) 

1100 

1101 # Test that the cache directory is not marked temporary 

1102 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1103 

1104 def assertCache(self, cache_manager): 

1105 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1106 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1107 

1108 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1109 self.assertIsInstance(uri, ButlerURI) 

1110 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1111 

1112 # Cached file should no longer exist but uncached file should be 

1113 # unaffectted. 

1114 self.assertFalse(self.files[0].exists()) 

1115 self.assertTrue(self.files[1].exists()) 

1116 

1117 # Should find this file and it should be within the cache directory. 

1118 found = cache_manager.find_in_cache(self.refs[0], ".txt") 

1119 self.assertTrue(found.exists()) 

1120 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1121 

1122 # Should not be able to find these in cache 

1123 self.assertIsNone(cache_manager.find_in_cache(self.refs[0], ".fits")) 

1124 self.assertIsNone(cache_manager.find_in_cache(self.refs[1], ".fits")) 

1125 

1126 def testNoCache(self): 

1127 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1128 for uri, ref in zip(self.files, self.refs): 

1129 self.assertFalse(cache_manager.should_be_cached(ref)) 

1130 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1131 self.assertIsNone(cache_manager.find_in_cache(ref, ".txt")) 

1132 

1133 

1134if __name__ == "__main__": 1134 ↛ 1135line 1134 didn't jump to line 1135, because the condition on line 1134 was never true

1135 unittest.main()