Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24import shutil 

25import yaml 

26import tempfile 

27import lsst.utils.tests 

28 

29from lsst.utils import doImport 

30 

31from lsst.daf.butler import StorageClassFactory, StorageClass, DimensionUniverse, FileDataset 

32from lsst.daf.butler import DatastoreConfig, DatasetTypeNotSupportedError, DatastoreValidationError 

33from lsst.daf.butler.formatters.yaml import YamlFormatter 

34 

35from lsst.daf.butler.tests import (DatasetTestHelper, DatastoreTestHelper, BadWriteFormatter, 

36 BadNoWriteFormatter, MetricsExample, DummyRegistry) 

37 

38 

39TESTDIR = os.path.dirname(__file__) 

40 

41 

42def makeExampleMetrics(use_none=False): 

43 if use_none: 

44 array = None 

45 else: 

46 array = [563, 234, 456.7, 105, 2054, -1045] 

47 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

48 {"a": [1, 2, 3], 

49 "b": {"blue": 5, "red": "green"}}, 

50 array, 

51 ) 

52 

53 

54class TransactionTestError(Exception): 

55 """Specific error for transactions, to prevent misdiagnosing 

56 that might otherwise occur when a standard exception is used. 

57 """ 

58 pass 

59 

60 

61class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

62 """Support routines for datastore testing""" 

63 root = None 

64 

65 @classmethod 

66 def setUpClass(cls): 

67 # Storage Classes are fixed for all datastores in these tests 

68 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

69 cls.storageClassFactory = StorageClassFactory() 

70 cls.storageClassFactory.addFromConfig(scConfigFile) 

71 

72 # Read the Datastore config so we can get the class 

73 # information (since we should not assume the constructor 

74 # name here, but rely on the configuration file itself) 

75 datastoreConfig = DatastoreConfig(cls.configFile) 

76 cls.datastoreType = doImport(datastoreConfig["cls"]) 

77 cls.universe = DimensionUniverse() 

78 

79 def setUp(self): 

80 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

81 

82 def tearDown(self): 

83 if self.root is not None and os.path.exists(self.root): 

84 shutil.rmtree(self.root, ignore_errors=True) 

85 

86 

87class DatastoreTests(DatastoreTestsBase): 

88 """Some basic tests of a simple datastore.""" 

89 

90 hasUnsupportedPut = True 

91 

92 def testConfigRoot(self): 

93 full = DatastoreConfig(self.configFile) 

94 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

95 newroot = "/random/location" 

96 self.datastoreType.setConfigRoot(newroot, config, full) 

97 if self.rootKeys: 

98 for k in self.rootKeys: 

99 self.assertIn(newroot, config[k]) 

100 

101 def testConstructor(self): 

102 datastore = self.makeDatastore() 

103 self.assertIsNotNone(datastore) 

104 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

105 

106 def testConfigurationValidation(self): 

107 datastore = self.makeDatastore() 

108 sc = self.storageClassFactory.getStorageClass("ThingOne") 

109 datastore.validateConfiguration([sc]) 

110 

111 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

112 if self.validationCanFail: 

113 with self.assertRaises(DatastoreValidationError): 

114 datastore.validateConfiguration([sc2], logFailures=True) 

115 

116 dimensions = self.universe.extract(("visit", "physical_filter")) 

117 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

118 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

119 datastore.validateConfiguration([ref]) 

120 

121 def testParameterValidation(self): 

122 """Check that parameters are validated""" 

123 sc = self.storageClassFactory.getStorageClass("ThingOne") 

124 dimensions = self.universe.extract(("visit", "physical_filter")) 

125 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

126 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

127 datastore = self.makeDatastore() 

128 data = {1: 2, 3: 4} 

129 datastore.put(data, ref) 

130 newdata = datastore.get(ref) 

131 self.assertEqual(data, newdata) 

132 with self.assertRaises(KeyError): 

133 newdata = datastore.get(ref, parameters={"missing": 5}) 

134 

135 def testBasicPutGet(self): 

136 metrics = makeExampleMetrics() 

137 datastore = self.makeDatastore() 

138 

139 # Create multiple storage classes for testing different formulations 

140 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

141 for sc in ("StructuredData", 

142 "StructuredDataJson", 

143 "StructuredDataPickle")] 

144 

145 dimensions = self.universe.extract(("visit", "physical_filter")) 

146 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

147 

148 for sc in storageClasses: 

149 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

150 print("Using storageClass: {}".format(sc.name)) 

151 datastore.put(metrics, ref) 

152 

153 # Does it exist? 

154 self.assertTrue(datastore.exists(ref)) 

155 

156 # Get 

157 metricsOut = datastore.get(ref, parameters=None) 

158 self.assertEqual(metrics, metricsOut) 

159 

160 uri = datastore.getURI(ref) 

161 self.assertEqual(uri.scheme, self.uriScheme) 

162 

163 # Get a component -- we need to construct new refs for them 

164 # with derived storage classes but with parent ID 

165 for comp in ("data", "output"): 

166 compRef = ref.makeComponentRef(comp) 

167 output = datastore.get(compRef) 

168 self.assertEqual(output, getattr(metricsOut, comp)) 

169 

170 uri = datastore.getURI(compRef) 

171 self.assertEqual(uri.scheme, self.uriScheme) 

172 

173 storageClass = sc 

174 

175 # Check that we can put a metric with None in a component and 

176 # get it back as None 

177 metricsNone = makeExampleMetrics(use_none=True) 

178 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

179 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

180 datastore.put(metricsNone, refNone) 

181 

182 comp = "data" 

183 for comp in ("data", "output"): 

184 compRef = refNone.makeComponentRef(comp) 

185 output = datastore.get(compRef) 

186 self.assertEqual(output, getattr(metricsNone, comp)) 

187 

188 # Check that a put fails if the dataset type is not supported 

189 if self.hasUnsupportedPut: 

190 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

191 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

192 with self.assertRaises(DatasetTypeNotSupportedError): 

193 datastore.put(metrics, ref) 

194 

195 # These should raise 

196 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

197 with self.assertRaises(FileNotFoundError): 

198 # non-existing file 

199 datastore.get(ref) 

200 

201 # Get a URI from it 

202 uri = datastore.getURI(ref, predict=True) 

203 self.assertEqual(uri.scheme, self.uriScheme) 

204 

205 with self.assertRaises(FileNotFoundError): 

206 datastore.getURI(ref) 

207 

208 def testTrustGetRequest(self): 

209 """Check that we can get datasets that registry knows nothing about. 

210 """ 

211 

212 datastore = self.makeDatastore() 

213 

214 # Skip test if the attribute is not defined 

215 if not hasattr(datastore, "trustGetRequest"): 

216 return 

217 

218 metrics = makeExampleMetrics() 

219 

220 i = 0 

221 for sc_name in ("StructuredData", "StructuredComposite"): 

222 i += 1 

223 datasetTypeName = f"metric{i}" 

224 

225 if sc_name == "StructuredComposite": 

226 disassembled = True 

227 else: 

228 disassembled = False 

229 

230 # Start datastore in default configuration of using registry 

231 datastore.trustGetRequest = False 

232 

233 # Create multiple storage classes for testing with or without 

234 # disassembly 

235 sc = self.storageClassFactory.getStorageClass(sc_name) 

236 dimensions = self.universe.extract(("visit", "physical_filter")) 

237 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"} 

238 

239 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

240 datastore.put(metrics, ref) 

241 

242 # Does it exist? 

243 self.assertTrue(datastore.exists(ref)) 

244 

245 # Get 

246 metricsOut = datastore.get(ref) 

247 self.assertEqual(metrics, metricsOut) 

248 

249 # Get the URI(s) 

250 primaryURI, componentURIs = datastore.getURIs(ref) 

251 if disassembled: 

252 self.assertIsNone(primaryURI) 

253 self.assertEqual(len(componentURIs), 3) 

254 else: 

255 self.assertIn(datasetTypeName, primaryURI.path) 

256 self.assertFalse(componentURIs) 

257 

258 # Delete registry entry so now we are trusting 

259 datastore.removeStoredItemInfo(ref) 

260 

261 # Now stop trusting and check that things break 

262 datastore.trustGetRequest = False 

263 

264 # Does it exist? 

265 self.assertFalse(datastore.exists(ref)) 

266 

267 with self.assertRaises(FileNotFoundError): 

268 datastore.get(ref) 

269 

270 with self.assertRaises(FileNotFoundError): 

271 datastore.get(ref.makeComponentRef("data")) 

272 

273 # URI should fail unless we ask for prediction 

274 with self.assertRaises(FileNotFoundError): 

275 datastore.getURIs(ref) 

276 

277 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

278 if disassembled: 

279 self.assertIsNone(predicted_primary) 

280 self.assertEqual(len(predicted_disassembled), 3) 

281 for uri in predicted_disassembled.values(): 

282 self.assertEqual(uri.fragment, "predicted") 

283 self.assertIn(datasetTypeName, uri.path) 

284 else: 

285 self.assertIn(datasetTypeName, predicted_primary.path) 

286 self.assertFalse(predicted_disassembled) 

287 self.assertEqual(predicted_primary.fragment, "predicted") 

288 

289 # Now enable registry-free trusting mode 

290 datastore.trustGetRequest = True 

291 

292 # Try again to get it 

293 metricsOut = datastore.get(ref) 

294 self.assertEqual(metricsOut, metrics) 

295 

296 # Does it exist? 

297 self.assertTrue(datastore.exists(ref)) 

298 

299 # Get a component 

300 comp = "data" 

301 compRef = ref.makeComponentRef(comp) 

302 output = datastore.get(compRef) 

303 self.assertEqual(output, getattr(metrics, comp)) 

304 

305 # Get the URI -- if we trust this should work even without 

306 # enabling prediction. 

307 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

308 self.assertEqual(primaryURI2, primaryURI) 

309 self.assertEqual(componentURIs2, componentURIs) 

310 

311 def testDisassembly(self): 

312 """Test disassembly within datastore.""" 

313 metrics = makeExampleMetrics() 

314 if self.isEphemeral: 

315 # in-memory datastore does not disassemble 

316 return 

317 

318 # Create multiple storage classes for testing different formulations 

319 # of composites. One of these will not disassemble to provide 

320 # a reference. 

321 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

322 for sc in ("StructuredComposite", 

323 "StructuredCompositeTestA", 

324 "StructuredCompositeTestB", 

325 "StructuredCompositeReadComp", 

326 "StructuredData", # No disassembly 

327 "StructuredCompositeReadCompNoDisassembly", 

328 )] 

329 

330 # Create the test datastore 

331 datastore = self.makeDatastore() 

332 

333 # Dummy dataId 

334 dimensions = self.universe.extract(("visit", "physical_filter")) 

335 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

336 

337 for i, sc in enumerate(storageClasses): 

338 with self.subTest(storageClass=sc.name): 

339 # Create a different dataset type each time round 

340 # so that a test failure in this subtest does not trigger 

341 # a cascade of tests because of file clashes 

342 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, 

343 conform=False) 

344 

345 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

346 

347 datastore.put(metrics, ref) 

348 

349 baseURI, compURIs = datastore.getURIs(ref) 

350 if disassembled: 

351 self.assertIsNone(baseURI) 

352 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

353 else: 

354 self.assertIsNotNone(baseURI) 

355 self.assertEqual(compURIs, {}) 

356 

357 metrics_get = datastore.get(ref) 

358 self.assertEqual(metrics_get, metrics) 

359 

360 # Retrieve the composite with read parameter 

361 stop = 4 

362 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

363 self.assertEqual(metrics_get.summary, metrics.summary) 

364 self.assertEqual(metrics_get.output, metrics.output) 

365 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

366 

367 # Retrieve a component 

368 data = datastore.get(ref.makeComponentRef("data")) 

369 self.assertEqual(data, metrics.data) 

370 

371 # On supported storage classes attempt to access a read 

372 # only component 

373 if "ReadComp" in sc.name: 

374 cRef = ref.makeComponentRef("counter") 

375 counter = datastore.get(cRef) 

376 self.assertEqual(counter, len(metrics.data)) 

377 

378 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

379 self.assertEqual(counter, stop) 

380 

381 datastore.remove(ref) 

382 

383 def testRegistryCompositePutGet(self): 

384 """Tests the case where registry disassembles and puts to datastore. 

385 """ 

386 metrics = makeExampleMetrics() 

387 datastore = self.makeDatastore() 

388 

389 # Create multiple storage classes for testing different formulations 

390 # of composites 

391 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

392 for sc in ("StructuredComposite", 

393 "StructuredCompositeTestA", 

394 "StructuredCompositeTestB", 

395 )] 

396 

397 dimensions = self.universe.extract(("visit", "physical_filter")) 

398 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

399 

400 for sc in storageClasses: 

401 print("Using storageClass: {}".format(sc.name)) 

402 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, 

403 conform=False) 

404 

405 components = sc.delegate().disassemble(metrics) 

406 self.assertTrue(components) 

407 

408 compsRead = {} 

409 for compName, compInfo in components.items(): 

410 compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions, 

411 components[compName].storageClass, dataId, 

412 conform=False) 

413 

414 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name)) 

415 datastore.put(compInfo.component, compRef) 

416 

417 uri = datastore.getURI(compRef) 

418 self.assertEqual(uri.scheme, self.uriScheme) 

419 

420 compsRead[compName] = datastore.get(compRef) 

421 

422 # We can generate identical files for each storage class 

423 # so remove the component here 

424 datastore.remove(compRef) 

425 

426 # combine all the components we read back into a new composite 

427 metricsOut = sc.delegate().assemble(compsRead) 

428 self.assertEqual(metrics, metricsOut) 

429 

430 def testRemove(self): 

431 metrics = makeExampleMetrics() 

432 datastore = self.makeDatastore() 

433 # Put 

434 dimensions = self.universe.extract(("visit", "physical_filter")) 

435 dataId = {"instrument": "dummy", "visit": 638, "physical_filter": "U"} 

436 

437 sc = self.storageClassFactory.getStorageClass("StructuredData") 

438 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

439 datastore.put(metrics, ref) 

440 

441 # Does it exist? 

442 self.assertTrue(datastore.exists(ref)) 

443 

444 # Get 

445 metricsOut = datastore.get(ref) 

446 self.assertEqual(metrics, metricsOut) 

447 # Remove 

448 datastore.remove(ref) 

449 

450 # Does it exist? 

451 self.assertFalse(datastore.exists(ref)) 

452 

453 # Do we now get a predicted URI? 

454 uri = datastore.getURI(ref, predict=True) 

455 self.assertEqual(uri.fragment, "predicted") 

456 

457 # Get should now fail 

458 with self.assertRaises(FileNotFoundError): 

459 datastore.get(ref) 

460 # Can only delete once 

461 with self.assertRaises(FileNotFoundError): 

462 datastore.remove(ref) 

463 

464 def testTransfer(self): 

465 metrics = makeExampleMetrics() 

466 

467 dimensions = self.universe.extract(("visit", "physical_filter")) 

468 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"} 

469 

470 sc = self.storageClassFactory.getStorageClass("StructuredData") 

471 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

472 

473 inputDatastore = self.makeDatastore("test_input_datastore") 

474 outputDatastore = self.makeDatastore("test_output_datastore") 

475 

476 inputDatastore.put(metrics, ref) 

477 outputDatastore.transfer(inputDatastore, ref) 

478 

479 metricsOut = outputDatastore.get(ref) 

480 self.assertEqual(metrics, metricsOut) 

481 

482 def testBasicTransaction(self): 

483 datastore = self.makeDatastore() 

484 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

485 dimensions = self.universe.extract(("visit", "physical_filter")) 

486 nDatasets = 6 

487 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] 

488 data = [(self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

489 makeExampleMetrics(),) 

490 for dataId in dataIds] 

491 succeed = data[:nDatasets//2] 

492 fail = data[nDatasets//2:] 

493 # All datasets added in this transaction should continue to exist 

494 with datastore.transaction(): 

495 for ref, metrics in succeed: 

496 datastore.put(metrics, ref) 

497 # Whereas datasets added in this transaction should not 

498 with self.assertRaises(TransactionTestError): 

499 with datastore.transaction(): 

500 for ref, metrics in fail: 

501 datastore.put(metrics, ref) 

502 raise TransactionTestError("This should propagate out of the context manager") 

503 # Check for datasets that should exist 

504 for ref, metrics in succeed: 

505 # Does it exist? 

506 self.assertTrue(datastore.exists(ref)) 

507 # Get 

508 metricsOut = datastore.get(ref, parameters=None) 

509 self.assertEqual(metrics, metricsOut) 

510 # URI 

511 uri = datastore.getURI(ref) 

512 self.assertEqual(uri.scheme, self.uriScheme) 

513 # Check for datasets that should not exist 

514 for ref, _ in fail: 

515 # These should raise 

516 with self.assertRaises(FileNotFoundError): 

517 # non-existing file 

518 datastore.get(ref) 

519 with self.assertRaises(FileNotFoundError): 

520 datastore.getURI(ref) 

521 

522 def testNestedTransaction(self): 

523 datastore = self.makeDatastore() 

524 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

525 dimensions = self.universe.extract(("visit", "physical_filter")) 

526 metrics = makeExampleMetrics() 

527 

528 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

529 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

530 conform=False) 

531 datastore.put(metrics, refBefore) 

532 with self.assertRaises(TransactionTestError): 

533 with datastore.transaction(): 

534 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} 

535 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

536 conform=False) 

537 datastore.put(metrics, refOuter) 

538 with datastore.transaction(): 

539 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} 

540 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

541 conform=False) 

542 datastore.put(metrics, refInner) 

543 # All datasets should exist 

544 for ref in (refBefore, refOuter, refInner): 

545 metricsOut = datastore.get(ref, parameters=None) 

546 self.assertEqual(metrics, metricsOut) 

547 raise TransactionTestError("This should roll back the transaction") 

548 # Dataset(s) inserted before the transaction should still exist 

549 metricsOut = datastore.get(refBefore, parameters=None) 

550 self.assertEqual(metrics, metricsOut) 

551 # But all datasets inserted during the (rolled back) transaction 

552 # should be gone 

553 with self.assertRaises(FileNotFoundError): 

554 datastore.get(refOuter) 

555 with self.assertRaises(FileNotFoundError): 

556 datastore.get(refInner) 

557 

558 def _prepareIngestTest(self): 

559 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

560 dimensions = self.universe.extract(("visit", "physical_filter")) 

561 metrics = makeExampleMetrics() 

562 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

563 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

564 return metrics, ref 

565 

566 def runIngestTest(self, func, expectOutput=True): 

567 metrics, ref = self._prepareIngestTest() 

568 # The file will be deleted after the test. 

569 # For symlink tests this leads to a situation where the datastore 

570 # points to a file that does not exist. This will make os.path.exist 

571 # return False but then the new symlink will fail with 

572 # FileExistsError later in the code so the test still passes. 

573 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

574 with open(path, 'w') as fd: 

575 yaml.dump(metrics._asdict(), stream=fd) 

576 func(metrics, path, ref) 

577 

578 def testIngestNoTransfer(self): 

579 """Test ingesting existing files with no transfer. 

580 """ 

581 for mode in (None, "auto"): 

582 

583 # Some datastores have auto but can't do in place transfer 

584 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

585 continue 

586 

587 with self.subTest(mode=mode): 

588 datastore = self.makeDatastore() 

589 

590 def succeed(obj, path, ref): 

591 """Ingest a file already in the datastore root.""" 

592 # first move it into the root, and adjust the path 

593 # accordingly 

594 path = shutil.copy(path, datastore.root.ospath) 

595 path = os.path.relpath(path, start=datastore.root.ospath) 

596 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

597 self.assertEqual(obj, datastore.get(ref)) 

598 

599 def failInputDoesNotExist(obj, path, ref): 

600 """Can't ingest files if we're given a bad path.""" 

601 with self.assertRaises(FileNotFoundError): 

602 datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref), 

603 transfer=mode) 

604 self.assertFalse(datastore.exists(ref)) 

605 

606 def failOutsideRoot(obj, path, ref): 

607 """Can't ingest files outside of datastore root unless 

608 auto.""" 

609 if mode == "auto": 

610 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

611 self.assertTrue(datastore.exists(ref)) 

612 else: 

613 with self.assertRaises(RuntimeError): 

614 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

615 self.assertFalse(datastore.exists(ref)) 

616 

617 def failNotImplemented(obj, path, ref): 

618 with self.assertRaises(NotImplementedError): 

619 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

620 

621 if mode in self.ingestTransferModes: 

622 self.runIngestTest(failOutsideRoot) 

623 self.runIngestTest(failInputDoesNotExist) 

624 self.runIngestTest(succeed) 

625 else: 

626 self.runIngestTest(failNotImplemented) 

627 

628 def testIngestTransfer(self): 

629 """Test ingesting existing files after transferring them. 

630 """ 

631 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

632 with self.subTest(mode=mode): 

633 datastore = self.makeDatastore(mode) 

634 

635 def succeed(obj, path, ref): 

636 """Ingest a file by transferring it to the template 

637 location.""" 

638 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

639 self.assertEqual(obj, datastore.get(ref)) 

640 

641 def failInputDoesNotExist(obj, path, ref): 

642 """Can't ingest files if we're given a bad path.""" 

643 with self.assertRaises(FileNotFoundError): 

644 # Ensure the file does not look like it is in 

645 # datastore for auto mode 

646 datastore.ingest(FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), 

647 transfer=mode) 

648 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

649 

650 def failOutputExists(obj, path, ref): 

651 """Can't ingest files if transfer destination already 

652 exists.""" 

653 with self.assertRaises(FileExistsError): 

654 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

655 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

656 

657 def failNotImplemented(obj, path, ref): 

658 with self.assertRaises(NotImplementedError): 

659 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

660 

661 if mode in self.ingestTransferModes: 

662 self.runIngestTest(failInputDoesNotExist) 

663 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

664 self.runIngestTest(failOutputExists) 

665 else: 

666 self.runIngestTest(failNotImplemented) 

667 

668 def testIngestSymlinkOfSymlink(self): 

669 """Special test for symlink to a symlink ingest""" 

670 metrics, ref = self._prepareIngestTest() 

671 # The aim of this test is to create a dataset on disk, then 

672 # create a symlink to it and finally ingest the symlink such that 

673 # the symlink in the datastore points to the original dataset. 

674 for mode in ("symlink", "relsymlink"): 

675 if mode not in self.ingestTransferModes: 

676 continue 

677 

678 print(f"Trying mode {mode}") 

679 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

680 with open(realpath, 'w') as fd: 

681 yaml.dump(metrics._asdict(), stream=fd) 

682 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

683 os.symlink(os.path.abspath(realpath), sympath) 

684 

685 datastore = self.makeDatastore() 

686 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

687 

688 uri = datastore.getURI(ref) 

689 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

690 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

691 

692 linkTarget = os.readlink(uri.ospath) 

693 if mode == "relsymlink": 

694 self.assertFalse(os.path.isabs(linkTarget)) 

695 else: 

696 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

697 

698 # Check that we can get the dataset back regardless of mode 

699 metric2 = datastore.get(ref) 

700 self.assertEqual(metric2, metrics) 

701 

702 # Cleanup the file for next time round loop 

703 # since it will get the same file name in store 

704 datastore.remove(ref) 

705 

706 

707class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

708 """PosixDatastore specialization""" 

709 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

710 uriScheme = "file" 

711 canIngestNoTransferAuto = True 

712 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

713 isEphemeral = False 

714 rootKeys = ("root",) 

715 validationCanFail = True 

716 

717 def setUp(self): 

718 # Override the working directory before calling the base class 

719 self.root = tempfile.mkdtemp(dir=TESTDIR) 

720 super().setUp() 

721 

722 

723class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

724 """Posix datastore tests but with checksums disabled.""" 

725 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

726 

727 def testChecksum(self): 

728 """Ensure that checksums have not been calculated.""" 

729 

730 datastore = self.makeDatastore() 

731 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

732 dimensions = self.universe.extract(("visit", "physical_filter")) 

733 metrics = makeExampleMetrics() 

734 

735 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

736 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

737 conform=False) 

738 

739 # Configuration should have disabled checksum calculation 

740 datastore.put(metrics, ref) 

741 infos = datastore.getStoredItemsInfo(ref) 

742 self.assertIsNone(infos[0].checksum) 

743 

744 # Remove put back but with checksums enabled explicitly 

745 datastore.remove(ref) 

746 datastore.useChecksum = True 

747 datastore.put(metrics, ref) 

748 

749 infos = datastore.getStoredItemsInfo(ref) 

750 self.assertIsNotNone(infos[0].checksum) 

751 

752 

753class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

754 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

755 

756 def setUp(self): 

757 # Override the working directory before calling the base class 

758 self.root = tempfile.mkdtemp(dir=TESTDIR) 

759 super().setUp() 

760 

761 def testCleanup(self): 

762 """Test that a failed formatter write does cleanup a partial file.""" 

763 metrics = makeExampleMetrics() 

764 datastore = self.makeDatastore() 

765 

766 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

767 

768 dimensions = self.universe.extract(("visit", "physical_filter")) 

769 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

770 

771 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

772 

773 # Determine where the file will end up (we assume Formatters use 

774 # the same file extension) 

775 expectedUri = datastore.getURI(ref, predict=True) 

776 self.assertEqual(expectedUri.fragment, "predicted") 

777 

778 self.assertEqual(expectedUri.getExtension(), ".yaml", 

779 f"Is there a file extension in {expectedUri}") 

780 

781 # Try formatter that fails and formatter that fails and leaves 

782 # a file behind 

783 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

784 with self.subTest(formatter=formatter): 

785 

786 # Monkey patch the formatter 

787 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, 

788 overwrite=True) 

789 

790 # Try to put the dataset, it should fail 

791 with self.assertRaises(Exception): 

792 datastore.put(metrics, ref) 

793 

794 # Check that there is no file on disk 

795 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

796 

797 # Check that there is a directory 

798 dir = expectedUri.dirname() 

799 self.assertTrue(dir.exists(), 

800 f"Check for existence of directory {dir}") 

801 

802 # Force YamlFormatter and check that this time a file is written 

803 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, 

804 overwrite=True) 

805 datastore.put(metrics, ref) 

806 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

807 datastore.remove(ref) 

808 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

809 

810 

811class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

812 """PosixDatastore specialization""" 

813 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

814 uriScheme = "mem" 

815 hasUnsupportedPut = False 

816 ingestTransferModes = () 

817 isEphemeral = True 

818 rootKeys = None 

819 validationCanFail = False 

820 

821 

822class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

823 """ChainedDatastore specialization using a POSIXDatastore""" 

824 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

825 hasUnsupportedPut = False 

826 canIngestNoTransferAuto = False 

827 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto") 

828 isEphemeral = False 

829 rootKeys = (".datastores.1.root", ".datastores.2.root") 

830 validationCanFail = True 

831 

832 

833class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

834 """ChainedDatastore specialization using all InMemoryDatastore""" 

835 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

836 validationCanFail = False 

837 

838 

839class DatastoreConstraintsTests(DatastoreTestsBase): 

840 """Basic tests of constraints model of Datastores.""" 

841 

842 def testConstraints(self): 

843 """Test constraints model. Assumes that each test class has the 

844 same constraints.""" 

845 metrics = makeExampleMetrics() 

846 datastore = self.makeDatastore() 

847 

848 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

849 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

850 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

851 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

852 

853 # Write empty file suitable for ingest check (JSON and YAML variants) 

854 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

855 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

856 for datasetTypeName, sc, accepted in (("metric", sc1, True), ("metric2", sc1, False), 

857 ("metric33", sc1, True), ("metric2", sc2, True)): 

858 # Choose different temp file depending on StorageClass 

859 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

860 

861 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

862 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

863 if accepted: 

864 datastore.put(metrics, ref) 

865 self.assertTrue(datastore.exists(ref)) 

866 datastore.remove(ref) 

867 

868 # Try ingest 

869 if self.canIngest: 

870 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

871 self.assertTrue(datastore.exists(ref)) 

872 datastore.remove(ref) 

873 else: 

874 with self.assertRaises(DatasetTypeNotSupportedError): 

875 datastore.put(metrics, ref) 

876 self.assertFalse(datastore.exists(ref)) 

877 

878 # Again with ingest 

879 if self.canIngest: 

880 with self.assertRaises(DatasetTypeNotSupportedError): 

881 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

882 self.assertFalse(datastore.exists(ref)) 

883 

884 

885class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

886 """PosixDatastore specialization""" 

887 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

888 canIngest = True 

889 

890 def setUp(self): 

891 # Override the working directory before calling the base class 

892 self.root = tempfile.mkdtemp(dir=TESTDIR) 

893 super().setUp() 

894 

895 

896class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

897 """InMemoryDatastore specialization""" 

898 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

899 canIngest = False 

900 

901 

902class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

903 """ChainedDatastore specialization using a POSIXDatastore and constraints 

904 at the ChainedDatstore """ 

905 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

906 

907 

908class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

909 """ChainedDatastore specialization using a POSIXDatastore""" 

910 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

911 

912 

913class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

914 """ChainedDatastore specialization using all InMemoryDatastore""" 

915 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

916 canIngest = False 

917 

918 

919class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

920 """Test that a chained datastore can control constraints per-datastore 

921 even if child datastore would accept.""" 

922 

923 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

924 

925 def setUp(self): 

926 # Override the working directory before calling the base class 

927 self.root = tempfile.mkdtemp(dir=TESTDIR) 

928 super().setUp() 

929 

930 def testConstraints(self): 

931 """Test chained datastore constraints model.""" 

932 metrics = makeExampleMetrics() 

933 datastore = self.makeDatastore() 

934 

935 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

936 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

937 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

938 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

939 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

940 

941 # Write empty file suitable for ingest check (JSON and YAML variants) 

942 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

943 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

944 

945 for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True), 

946 ("metric2", dataId1, sc1, (False, False, False), False), 

947 ("metric2", dataId2, sc1, (True, False, False), False), 

948 ("metric33", dataId2, sc2, (True, True, False), True), 

949 ("metric2", dataId1, sc2, (False, True, False), True)): 

950 

951 # Choose different temp file depending on StorageClass 

952 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

953 

954 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

955 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, 

956 conform=False) 

957 if any(accept): 

958 datastore.put(metrics, ref) 

959 self.assertTrue(datastore.exists(ref)) 

960 

961 # Check each datastore inside the chained datastore 

962 for childDatastore, expected in zip(datastore.datastores, accept): 

963 self.assertEqual(childDatastore.exists(ref), expected, 

964 f"Testing presence of {ref} in datastore {childDatastore.name}") 

965 

966 datastore.remove(ref) 

967 

968 # Check that ingest works 

969 if ingest: 

970 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

971 self.assertTrue(datastore.exists(ref)) 

972 

973 # Check each datastore inside the chained datastore 

974 for childDatastore, expected in zip(datastore.datastores, accept): 

975 # Ephemeral datastores means InMemory at the moment 

976 # and that does not accept ingest of files. 

977 if childDatastore.isEphemeral: 

978 expected = False 

979 self.assertEqual(childDatastore.exists(ref), expected, 

980 f"Testing presence of ingested {ref} in datastore" 

981 f" {childDatastore.name}") 

982 

983 datastore.remove(ref) 

984 else: 

985 with self.assertRaises(DatasetTypeNotSupportedError): 

986 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

987 

988 else: 

989 with self.assertRaises(DatasetTypeNotSupportedError): 

990 datastore.put(metrics, ref) 

991 self.assertFalse(datastore.exists(ref)) 

992 

993 # Again with ingest 

994 with self.assertRaises(DatasetTypeNotSupportedError): 

995 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

996 self.assertFalse(datastore.exists(ref)) 

997 

998 

999if __name__ == "__main__": 999 ↛ 1000line 999 didn't jump to line 1000, because the condition on line 999 was never true

1000 unittest.main()