Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24import shutil 

25import yaml 

26import tempfile 

27import lsst.utils.tests 

28 

29from lsst.utils import doImport 

30 

31from lsst.daf.butler import StorageClassFactory, StorageClass, DimensionUniverse, FileDataset 

32from lsst.daf.butler import DatastoreConfig, DatasetTypeNotSupportedError, DatastoreValidationError 

33from lsst.daf.butler.formatters.yaml import YamlFormatter 

34 

35from lsst.daf.butler.tests import (DatasetTestHelper, DatastoreTestHelper, BadWriteFormatter, 

36 BadNoWriteFormatter, MetricsExample, DummyRegistry) 

37 

38 

39TESTDIR = os.path.dirname(__file__) 

40 

41 

42def makeExampleMetrics(use_none=False): 

43 if use_none: 

44 array = None 

45 else: 

46 array = [563, 234, 456.7, 105, 2054, -1045] 

47 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

48 {"a": [1, 2, 3], 

49 "b": {"blue": 5, "red": "green"}}, 

50 array, 

51 ) 

52 

53 

54class TransactionTestError(Exception): 

55 """Specific error for transactions, to prevent misdiagnosing 

56 that might otherwise occur when a standard exception is used. 

57 """ 

58 pass 

59 

60 

61class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

62 """Support routines for datastore testing""" 

63 root = None 

64 

65 @classmethod 

66 def setUpClass(cls): 

67 # Storage Classes are fixed for all datastores in these tests 

68 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

69 cls.storageClassFactory = StorageClassFactory() 

70 cls.storageClassFactory.addFromConfig(scConfigFile) 

71 

72 # Read the Datastore config so we can get the class 

73 # information (since we should not assume the constructor 

74 # name here, but rely on the configuration file itself) 

75 datastoreConfig = DatastoreConfig(cls.configFile) 

76 cls.datastoreType = doImport(datastoreConfig["cls"]) 

77 cls.universe = DimensionUniverse() 

78 

79 def setUp(self): 

80 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

81 

82 def tearDown(self): 

83 if self.root is not None and os.path.exists(self.root): 

84 shutil.rmtree(self.root, ignore_errors=True) 

85 

86 

87class DatastoreTests(DatastoreTestsBase): 

88 """Some basic tests of a simple datastore.""" 

89 

90 hasUnsupportedPut = True 

91 

92 def testConfigRoot(self): 

93 full = DatastoreConfig(self.configFile) 

94 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

95 newroot = "/random/location" 

96 self.datastoreType.setConfigRoot(newroot, config, full) 

97 if self.rootKeys: 

98 for k in self.rootKeys: 

99 self.assertIn(newroot, config[k]) 

100 

101 def testConstructor(self): 

102 datastore = self.makeDatastore() 

103 self.assertIsNotNone(datastore) 

104 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

105 

106 def testConfigurationValidation(self): 

107 datastore = self.makeDatastore() 

108 sc = self.storageClassFactory.getStorageClass("ThingOne") 

109 datastore.validateConfiguration([sc]) 

110 

111 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

112 if self.validationCanFail: 

113 with self.assertRaises(DatastoreValidationError): 

114 datastore.validateConfiguration([sc2], logFailures=True) 

115 

116 dimensions = self.universe.extract(("visit", "physical_filter")) 

117 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

118 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

119 datastore.validateConfiguration([ref]) 

120 

121 def testParameterValidation(self): 

122 """Check that parameters are validated""" 

123 sc = self.storageClassFactory.getStorageClass("ThingOne") 

124 dimensions = self.universe.extract(("visit", "physical_filter")) 

125 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

126 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

127 datastore = self.makeDatastore() 

128 data = {1: 2, 3: 4} 

129 datastore.put(data, ref) 

130 newdata = datastore.get(ref) 

131 self.assertEqual(data, newdata) 

132 with self.assertRaises(KeyError): 

133 newdata = datastore.get(ref, parameters={"missing": 5}) 

134 

135 def testBasicPutGet(self): 

136 metrics = makeExampleMetrics() 

137 datastore = self.makeDatastore() 

138 

139 # Create multiple storage classes for testing different formulations 

140 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

141 for sc in ("StructuredData", 

142 "StructuredDataJson", 

143 "StructuredDataPickle")] 

144 

145 dimensions = self.universe.extract(("visit", "physical_filter")) 

146 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

147 

148 for sc in storageClasses: 

149 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

150 print("Using storageClass: {}".format(sc.name)) 

151 datastore.put(metrics, ref) 

152 

153 # Does it exist? 

154 self.assertTrue(datastore.exists(ref)) 

155 

156 # Get 

157 metricsOut = datastore.get(ref, parameters=None) 

158 self.assertEqual(metrics, metricsOut) 

159 

160 uri = datastore.getURI(ref) 

161 self.assertEqual(uri.scheme, self.uriScheme) 

162 

163 # Get a component -- we need to construct new refs for them 

164 # with derived storage classes but with parent ID 

165 for comp in ("data", "output"): 

166 compRef = ref.makeComponentRef(comp) 

167 output = datastore.get(compRef) 

168 self.assertEqual(output, getattr(metricsOut, comp)) 

169 

170 uri = datastore.getURI(compRef) 

171 self.assertEqual(uri.scheme, self.uriScheme) 

172 

173 storageClass = sc 

174 

175 # Check that we can put a metric with None in a component and 

176 # get it back as None 

177 metricsNone = makeExampleMetrics(use_none=True) 

178 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

179 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

180 datastore.put(metricsNone, refNone) 

181 

182 comp = "data" 

183 for comp in ("data", "output"): 

184 compRef = refNone.makeComponentRef(comp) 

185 output = datastore.get(compRef) 

186 self.assertEqual(output, getattr(metricsNone, comp)) 

187 

188 # Check that a put fails if the dataset type is not supported 

189 if self.hasUnsupportedPut: 

190 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

191 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

192 with self.assertRaises(DatasetTypeNotSupportedError): 

193 datastore.put(metrics, ref) 

194 

195 # These should raise 

196 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

197 with self.assertRaises(FileNotFoundError): 

198 # non-existing file 

199 datastore.get(ref) 

200 

201 # Get a URI from it 

202 uri = datastore.getURI(ref, predict=True) 

203 self.assertEqual(uri.scheme, self.uriScheme) 

204 

205 with self.assertRaises(FileNotFoundError): 

206 datastore.getURI(ref) 

207 

208 def testTrustGetRequest(self): 

209 """Check that we can get datasets that registry knows nothing about. 

210 """ 

211 

212 datastore = self.makeDatastore() 

213 

214 # Skip test if the attribute is not defined 

215 if not hasattr(datastore, "trustGetRequest"): 

216 return 

217 

218 metrics = makeExampleMetrics() 

219 

220 i = 0 

221 for sc_name in ("StructuredData", "StructuredComposite"): 

222 i += 1 

223 datasetTypeName = f"metric{i}" 

224 

225 if sc_name == "StructuredComposite": 

226 disassembled = True 

227 else: 

228 disassembled = False 

229 

230 # Start datastore in default configuration of using registry 

231 datastore.trustGetRequest = False 

232 

233 # Create multiple storage classes for testing with or without 

234 # disassembly 

235 sc = self.storageClassFactory.getStorageClass(sc_name) 

236 dimensions = self.universe.extract(("visit", "physical_filter")) 

237 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"} 

238 

239 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

240 datastore.put(metrics, ref) 

241 

242 # Does it exist? 

243 self.assertTrue(datastore.exists(ref)) 

244 

245 # Get 

246 metricsOut = datastore.get(ref) 

247 self.assertEqual(metrics, metricsOut) 

248 

249 # Get the URI(s) 

250 primaryURI, componentURIs = datastore.getURIs(ref) 

251 if disassembled: 

252 self.assertIsNone(primaryURI) 

253 self.assertEqual(len(componentURIs), 3) 

254 else: 

255 self.assertIn(datasetTypeName, primaryURI.path) 

256 self.assertFalse(componentURIs) 

257 

258 # Delete registry entry so now we are trusting 

259 datastore.removeStoredItemInfo(ref) 

260 

261 # Now stop trusting and check that things break 

262 datastore.trustGetRequest = False 

263 

264 # Does it exist? 

265 self.assertFalse(datastore.exists(ref)) 

266 

267 with self.assertRaises(FileNotFoundError): 

268 datastore.get(ref) 

269 

270 with self.assertRaises(FileNotFoundError): 

271 datastore.get(ref.makeComponentRef("data")) 

272 

273 # URI should fail unless we ask for prediction 

274 with self.assertRaises(FileNotFoundError): 

275 datastore.getURIs(ref) 

276 

277 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

278 if disassembled: 

279 self.assertIsNone(predicted_primary) 

280 self.assertEqual(len(predicted_disassembled), 3) 

281 for uri in predicted_disassembled.values(): 

282 self.assertEqual(uri.fragment, "predicted") 

283 self.assertIn(datasetTypeName, uri.path) 

284 else: 

285 self.assertIn(datasetTypeName, predicted_primary.path) 

286 self.assertFalse(predicted_disassembled) 

287 self.assertEqual(predicted_primary.fragment, "predicted") 

288 

289 # Now enable registry-free trusting mode 

290 datastore.trustGetRequest = True 

291 

292 # Try again to get it 

293 metricsOut = datastore.get(ref) 

294 self.assertEqual(metricsOut, metrics) 

295 

296 # Does it exist? 

297 self.assertTrue(datastore.exists(ref)) 

298 

299 # Get a component 

300 comp = "data" 

301 compRef = ref.makeComponentRef(comp) 

302 output = datastore.get(compRef) 

303 self.assertEqual(output, getattr(metrics, comp)) 

304 

305 # Get the URI -- if we trust this should work even without 

306 # enabling prediction. 

307 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

308 self.assertEqual(primaryURI2, primaryURI) 

309 self.assertEqual(componentURIs2, componentURIs) 

310 

311 def testDisassembly(self): 

312 """Test disassembly within datastore.""" 

313 metrics = makeExampleMetrics() 

314 if self.isEphemeral: 

315 # in-memory datastore does not disassemble 

316 return 

317 

318 # Create multiple storage classes for testing different formulations 

319 # of composites. One of these will not disassemble to provide 

320 # a reference. 

321 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

322 for sc in ("StructuredComposite", 

323 "StructuredCompositeTestA", 

324 "StructuredCompositeTestB", 

325 "StructuredCompositeReadComp", 

326 "StructuredData", # No disassembly 

327 "StructuredCompositeReadCompNoDisassembly", 

328 )] 

329 

330 # Create the test datastore 

331 datastore = self.makeDatastore() 

332 

333 # Dummy dataId 

334 dimensions = self.universe.extract(("visit", "physical_filter")) 

335 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

336 

337 for i, sc in enumerate(storageClasses): 

338 with self.subTest(storageClass=sc.name): 

339 # Create a different dataset type each time round 

340 # so that a test failure in this subtest does not trigger 

341 # a cascade of tests because of file clashes 

342 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, 

343 conform=False) 

344 

345 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

346 

347 datastore.put(metrics, ref) 

348 

349 baseURI, compURIs = datastore.getURIs(ref) 

350 if disassembled: 

351 self.assertIsNone(baseURI) 

352 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

353 else: 

354 self.assertIsNotNone(baseURI) 

355 self.assertEqual(compURIs, {}) 

356 

357 metrics_get = datastore.get(ref) 

358 self.assertEqual(metrics_get, metrics) 

359 

360 # Retrieve the composite with read parameter 

361 stop = 4 

362 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

363 self.assertEqual(metrics_get.summary, metrics.summary) 

364 self.assertEqual(metrics_get.output, metrics.output) 

365 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

366 

367 # Retrieve a component 

368 data = datastore.get(ref.makeComponentRef("data")) 

369 self.assertEqual(data, metrics.data) 

370 

371 # On supported storage classes attempt to access a read 

372 # only component 

373 if "ReadComp" in sc.name: 

374 cRef = ref.makeComponentRef("counter") 

375 counter = datastore.get(cRef) 

376 self.assertEqual(counter, len(metrics.data)) 

377 

378 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

379 self.assertEqual(counter, stop) 

380 

381 datastore.remove(ref) 

382 

383 def testRegistryCompositePutGet(self): 

384 """Tests the case where registry disassembles and puts to datastore. 

385 """ 

386 metrics = makeExampleMetrics() 

387 datastore = self.makeDatastore() 

388 

389 # Create multiple storage classes for testing different formulations 

390 # of composites 

391 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

392 for sc in ("StructuredComposite", 

393 "StructuredCompositeTestA", 

394 "StructuredCompositeTestB", 

395 )] 

396 

397 dimensions = self.universe.extract(("visit", "physical_filter")) 

398 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

399 

400 for sc in storageClasses: 

401 print("Using storageClass: {}".format(sc.name)) 

402 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, 

403 conform=False) 

404 

405 components = sc.delegate().disassemble(metrics) 

406 self.assertTrue(components) 

407 

408 compsRead = {} 

409 for compName, compInfo in components.items(): 

410 compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions, 

411 components[compName].storageClass, dataId, 

412 conform=False) 

413 

414 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name)) 

415 datastore.put(compInfo.component, compRef) 

416 

417 uri = datastore.getURI(compRef) 

418 self.assertEqual(uri.scheme, self.uriScheme) 

419 

420 compsRead[compName] = datastore.get(compRef) 

421 

422 # We can generate identical files for each storage class 

423 # so remove the component here 

424 datastore.remove(compRef) 

425 

426 # combine all the components we read back into a new composite 

427 metricsOut = sc.delegate().assemble(compsRead) 

428 self.assertEqual(metrics, metricsOut) 

429 

430 def prepDeleteTest(self): 

431 metrics = makeExampleMetrics() 

432 datastore = self.makeDatastore() 

433 # Put 

434 dimensions = self.universe.extract(("visit", "physical_filter")) 

435 dataId = {"instrument": "dummy", "visit": 638, "physical_filter": "U"} 

436 

437 sc = self.storageClassFactory.getStorageClass("StructuredData") 

438 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

439 datastore.put(metrics, ref) 

440 

441 # Does it exist? 

442 self.assertTrue(datastore.exists(ref)) 

443 

444 # Get 

445 metricsOut = datastore.get(ref) 

446 self.assertEqual(metrics, metricsOut) 

447 

448 return datastore, ref 

449 

450 def testRemove(self): 

451 datastore, ref = self.prepDeleteTest() 

452 

453 # Remove 

454 datastore.remove(ref) 

455 

456 # Does it exist? 

457 self.assertFalse(datastore.exists(ref)) 

458 

459 # Do we now get a predicted URI? 

460 uri = datastore.getURI(ref, predict=True) 

461 self.assertEqual(uri.fragment, "predicted") 

462 

463 # Get should now fail 

464 with self.assertRaises(FileNotFoundError): 

465 datastore.get(ref) 

466 # Can only delete once 

467 with self.assertRaises(FileNotFoundError): 

468 datastore.remove(ref) 

469 

470 def testForget(self): 

471 datastore, ref = self.prepDeleteTest() 

472 

473 # Remove 

474 datastore.forget([ref]) 

475 

476 # Does it exist (as far as we know)? 

477 self.assertFalse(datastore.exists(ref)) 

478 

479 # Do we now get a predicted URI? 

480 uri = datastore.getURI(ref, predict=True) 

481 self.assertEqual(uri.fragment, "predicted") 

482 

483 # Get should now fail 

484 with self.assertRaises(FileNotFoundError): 

485 datastore.get(ref) 

486 

487 # Forgetting again is a silent no-op 

488 datastore.forget([ref]) 

489 

490 # Predicted URI should still point to the file. 

491 self.assertTrue(uri.exists()) 

492 

493 def testTransfer(self): 

494 metrics = makeExampleMetrics() 

495 

496 dimensions = self.universe.extract(("visit", "physical_filter")) 

497 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"} 

498 

499 sc = self.storageClassFactory.getStorageClass("StructuredData") 

500 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

501 

502 inputDatastore = self.makeDatastore("test_input_datastore") 

503 outputDatastore = self.makeDatastore("test_output_datastore") 

504 

505 inputDatastore.put(metrics, ref) 

506 outputDatastore.transfer(inputDatastore, ref) 

507 

508 metricsOut = outputDatastore.get(ref) 

509 self.assertEqual(metrics, metricsOut) 

510 

511 def testBasicTransaction(self): 

512 datastore = self.makeDatastore() 

513 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

514 dimensions = self.universe.extract(("visit", "physical_filter")) 

515 nDatasets = 6 

516 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] 

517 data = [(self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

518 makeExampleMetrics(),) 

519 for dataId in dataIds] 

520 succeed = data[:nDatasets//2] 

521 fail = data[nDatasets//2:] 

522 # All datasets added in this transaction should continue to exist 

523 with datastore.transaction(): 

524 for ref, metrics in succeed: 

525 datastore.put(metrics, ref) 

526 # Whereas datasets added in this transaction should not 

527 with self.assertRaises(TransactionTestError): 

528 with datastore.transaction(): 

529 for ref, metrics in fail: 

530 datastore.put(metrics, ref) 

531 raise TransactionTestError("This should propagate out of the context manager") 

532 # Check for datasets that should exist 

533 for ref, metrics in succeed: 

534 # Does it exist? 

535 self.assertTrue(datastore.exists(ref)) 

536 # Get 

537 metricsOut = datastore.get(ref, parameters=None) 

538 self.assertEqual(metrics, metricsOut) 

539 # URI 

540 uri = datastore.getURI(ref) 

541 self.assertEqual(uri.scheme, self.uriScheme) 

542 # Check for datasets that should not exist 

543 for ref, _ in fail: 

544 # These should raise 

545 with self.assertRaises(FileNotFoundError): 

546 # non-existing file 

547 datastore.get(ref) 

548 with self.assertRaises(FileNotFoundError): 

549 datastore.getURI(ref) 

550 

551 def testNestedTransaction(self): 

552 datastore = self.makeDatastore() 

553 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

554 dimensions = self.universe.extract(("visit", "physical_filter")) 

555 metrics = makeExampleMetrics() 

556 

557 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

558 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

559 conform=False) 

560 datastore.put(metrics, refBefore) 

561 with self.assertRaises(TransactionTestError): 

562 with datastore.transaction(): 

563 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} 

564 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

565 conform=False) 

566 datastore.put(metrics, refOuter) 

567 with datastore.transaction(): 

568 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} 

569 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

570 conform=False) 

571 datastore.put(metrics, refInner) 

572 # All datasets should exist 

573 for ref in (refBefore, refOuter, refInner): 

574 metricsOut = datastore.get(ref, parameters=None) 

575 self.assertEqual(metrics, metricsOut) 

576 raise TransactionTestError("This should roll back the transaction") 

577 # Dataset(s) inserted before the transaction should still exist 

578 metricsOut = datastore.get(refBefore, parameters=None) 

579 self.assertEqual(metrics, metricsOut) 

580 # But all datasets inserted during the (rolled back) transaction 

581 # should be gone 

582 with self.assertRaises(FileNotFoundError): 

583 datastore.get(refOuter) 

584 with self.assertRaises(FileNotFoundError): 

585 datastore.get(refInner) 

586 

587 def _prepareIngestTest(self): 

588 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

589 dimensions = self.universe.extract(("visit", "physical_filter")) 

590 metrics = makeExampleMetrics() 

591 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

592 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

593 return metrics, ref 

594 

595 def runIngestTest(self, func, expectOutput=True): 

596 metrics, ref = self._prepareIngestTest() 

597 # The file will be deleted after the test. 

598 # For symlink tests this leads to a situation where the datastore 

599 # points to a file that does not exist. This will make os.path.exist 

600 # return False but then the new symlink will fail with 

601 # FileExistsError later in the code so the test still passes. 

602 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

603 with open(path, 'w') as fd: 

604 yaml.dump(metrics._asdict(), stream=fd) 

605 func(metrics, path, ref) 

606 

607 def testIngestNoTransfer(self): 

608 """Test ingesting existing files with no transfer. 

609 """ 

610 for mode in (None, "auto"): 

611 

612 # Some datastores have auto but can't do in place transfer 

613 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

614 continue 

615 

616 with self.subTest(mode=mode): 

617 datastore = self.makeDatastore() 

618 

619 def succeed(obj, path, ref): 

620 """Ingest a file already in the datastore root.""" 

621 # first move it into the root, and adjust the path 

622 # accordingly 

623 path = shutil.copy(path, datastore.root.ospath) 

624 path = os.path.relpath(path, start=datastore.root.ospath) 

625 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

626 self.assertEqual(obj, datastore.get(ref)) 

627 

628 def failInputDoesNotExist(obj, path, ref): 

629 """Can't ingest files if we're given a bad path.""" 

630 with self.assertRaises(FileNotFoundError): 

631 datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref), 

632 transfer=mode) 

633 self.assertFalse(datastore.exists(ref)) 

634 

635 def failOutsideRoot(obj, path, ref): 

636 """Can't ingest files outside of datastore root unless 

637 auto.""" 

638 if mode == "auto": 

639 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

640 self.assertTrue(datastore.exists(ref)) 

641 else: 

642 with self.assertRaises(RuntimeError): 

643 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

644 self.assertFalse(datastore.exists(ref)) 

645 

646 def failNotImplemented(obj, path, ref): 

647 with self.assertRaises(NotImplementedError): 

648 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

649 

650 if mode in self.ingestTransferModes: 

651 self.runIngestTest(failOutsideRoot) 

652 self.runIngestTest(failInputDoesNotExist) 

653 self.runIngestTest(succeed) 

654 else: 

655 self.runIngestTest(failNotImplemented) 

656 

657 def testIngestTransfer(self): 

658 """Test ingesting existing files after transferring them. 

659 """ 

660 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

661 with self.subTest(mode=mode): 

662 datastore = self.makeDatastore(mode) 

663 

664 def succeed(obj, path, ref): 

665 """Ingest a file by transferring it to the template 

666 location.""" 

667 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

668 self.assertEqual(obj, datastore.get(ref)) 

669 

670 def failInputDoesNotExist(obj, path, ref): 

671 """Can't ingest files if we're given a bad path.""" 

672 with self.assertRaises(FileNotFoundError): 

673 # Ensure the file does not look like it is in 

674 # datastore for auto mode 

675 datastore.ingest(FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), 

676 transfer=mode) 

677 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

678 

679 def failOutputExists(obj, path, ref): 

680 """Can't ingest files if transfer destination already 

681 exists.""" 

682 with self.assertRaises(FileExistsError): 

683 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

684 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

685 

686 def failNotImplemented(obj, path, ref): 

687 with self.assertRaises(NotImplementedError): 

688 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

689 

690 if mode in self.ingestTransferModes: 

691 self.runIngestTest(failInputDoesNotExist) 

692 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

693 self.runIngestTest(failOutputExists) 

694 else: 

695 self.runIngestTest(failNotImplemented) 

696 

697 def testIngestSymlinkOfSymlink(self): 

698 """Special test for symlink to a symlink ingest""" 

699 metrics, ref = self._prepareIngestTest() 

700 # The aim of this test is to create a dataset on disk, then 

701 # create a symlink to it and finally ingest the symlink such that 

702 # the symlink in the datastore points to the original dataset. 

703 for mode in ("symlink", "relsymlink"): 

704 if mode not in self.ingestTransferModes: 

705 continue 

706 

707 print(f"Trying mode {mode}") 

708 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

709 with open(realpath, 'w') as fd: 

710 yaml.dump(metrics._asdict(), stream=fd) 

711 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

712 os.symlink(os.path.abspath(realpath), sympath) 

713 

714 datastore = self.makeDatastore() 

715 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

716 

717 uri = datastore.getURI(ref) 

718 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

719 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

720 

721 linkTarget = os.readlink(uri.ospath) 

722 if mode == "relsymlink": 

723 self.assertFalse(os.path.isabs(linkTarget)) 

724 else: 

725 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

726 

727 # Check that we can get the dataset back regardless of mode 

728 metric2 = datastore.get(ref) 

729 self.assertEqual(metric2, metrics) 

730 

731 # Cleanup the file for next time round loop 

732 # since it will get the same file name in store 

733 datastore.remove(ref) 

734 

735 

736class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

737 """PosixDatastore specialization""" 

738 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

739 uriScheme = "file" 

740 canIngestNoTransferAuto = True 

741 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

742 isEphemeral = False 

743 rootKeys = ("root",) 

744 validationCanFail = True 

745 

746 def setUp(self): 

747 # Override the working directory before calling the base class 

748 self.root = tempfile.mkdtemp(dir=TESTDIR) 

749 super().setUp() 

750 

751 

752class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

753 """Posix datastore tests but with checksums disabled.""" 

754 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

755 

756 def testChecksum(self): 

757 """Ensure that checksums have not been calculated.""" 

758 

759 datastore = self.makeDatastore() 

760 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

761 dimensions = self.universe.extract(("visit", "physical_filter")) 

762 metrics = makeExampleMetrics() 

763 

764 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

765 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

766 conform=False) 

767 

768 # Configuration should have disabled checksum calculation 

769 datastore.put(metrics, ref) 

770 infos = datastore.getStoredItemsInfo(ref) 

771 self.assertIsNone(infos[0].checksum) 

772 

773 # Remove put back but with checksums enabled explicitly 

774 datastore.remove(ref) 

775 datastore.useChecksum = True 

776 datastore.put(metrics, ref) 

777 

778 infos = datastore.getStoredItemsInfo(ref) 

779 self.assertIsNotNone(infos[0].checksum) 

780 

781 

782class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

783 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

784 

785 def setUp(self): 

786 # Override the working directory before calling the base class 

787 self.root = tempfile.mkdtemp(dir=TESTDIR) 

788 super().setUp() 

789 

790 def testCleanup(self): 

791 """Test that a failed formatter write does cleanup a partial file.""" 

792 metrics = makeExampleMetrics() 

793 datastore = self.makeDatastore() 

794 

795 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

796 

797 dimensions = self.universe.extract(("visit", "physical_filter")) 

798 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

799 

800 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

801 

802 # Determine where the file will end up (we assume Formatters use 

803 # the same file extension) 

804 expectedUri = datastore.getURI(ref, predict=True) 

805 self.assertEqual(expectedUri.fragment, "predicted") 

806 

807 self.assertEqual(expectedUri.getExtension(), ".yaml", 

808 f"Is there a file extension in {expectedUri}") 

809 

810 # Try formatter that fails and formatter that fails and leaves 

811 # a file behind 

812 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

813 with self.subTest(formatter=formatter): 

814 

815 # Monkey patch the formatter 

816 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, 

817 overwrite=True) 

818 

819 # Try to put the dataset, it should fail 

820 with self.assertRaises(Exception): 

821 datastore.put(metrics, ref) 

822 

823 # Check that there is no file on disk 

824 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

825 

826 # Check that there is a directory 

827 dir = expectedUri.dirname() 

828 self.assertTrue(dir.exists(), 

829 f"Check for existence of directory {dir}") 

830 

831 # Force YamlFormatter and check that this time a file is written 

832 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, 

833 overwrite=True) 

834 datastore.put(metrics, ref) 

835 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

836 datastore.remove(ref) 

837 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

838 

839 

840class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

841 """PosixDatastore specialization""" 

842 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

843 uriScheme = "mem" 

844 hasUnsupportedPut = False 

845 ingestTransferModes = () 

846 isEphemeral = True 

847 rootKeys = None 

848 validationCanFail = False 

849 

850 

851class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

852 """ChainedDatastore specialization using a POSIXDatastore""" 

853 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

854 hasUnsupportedPut = False 

855 canIngestNoTransferAuto = False 

856 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto") 

857 isEphemeral = False 

858 rootKeys = (".datastores.1.root", ".datastores.2.root") 

859 validationCanFail = True 

860 

861 

862class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

863 """ChainedDatastore specialization using all InMemoryDatastore""" 

864 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

865 validationCanFail = False 

866 

867 

868class DatastoreConstraintsTests(DatastoreTestsBase): 

869 """Basic tests of constraints model of Datastores.""" 

870 

871 def testConstraints(self): 

872 """Test constraints model. Assumes that each test class has the 

873 same constraints.""" 

874 metrics = makeExampleMetrics() 

875 datastore = self.makeDatastore() 

876 

877 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

878 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

879 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

880 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

881 

882 # Write empty file suitable for ingest check (JSON and YAML variants) 

883 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

884 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

885 for datasetTypeName, sc, accepted in (("metric", sc1, True), ("metric2", sc1, False), 

886 ("metric33", sc1, True), ("metric2", sc2, True)): 

887 # Choose different temp file depending on StorageClass 

888 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

889 

890 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

891 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

892 if accepted: 

893 datastore.put(metrics, ref) 

894 self.assertTrue(datastore.exists(ref)) 

895 datastore.remove(ref) 

896 

897 # Try ingest 

898 if self.canIngest: 

899 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

900 self.assertTrue(datastore.exists(ref)) 

901 datastore.remove(ref) 

902 else: 

903 with self.assertRaises(DatasetTypeNotSupportedError): 

904 datastore.put(metrics, ref) 

905 self.assertFalse(datastore.exists(ref)) 

906 

907 # Again with ingest 

908 if self.canIngest: 

909 with self.assertRaises(DatasetTypeNotSupportedError): 

910 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

911 self.assertFalse(datastore.exists(ref)) 

912 

913 

914class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

915 """PosixDatastore specialization""" 

916 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

917 canIngest = True 

918 

919 def setUp(self): 

920 # Override the working directory before calling the base class 

921 self.root = tempfile.mkdtemp(dir=TESTDIR) 

922 super().setUp() 

923 

924 

925class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

926 """InMemoryDatastore specialization""" 

927 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

928 canIngest = False 

929 

930 

931class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

932 """ChainedDatastore specialization using a POSIXDatastore and constraints 

933 at the ChainedDatstore """ 

934 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

935 

936 

937class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

938 """ChainedDatastore specialization using a POSIXDatastore""" 

939 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

940 

941 

942class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

943 """ChainedDatastore specialization using all InMemoryDatastore""" 

944 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

945 canIngest = False 

946 

947 

948class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

949 """Test that a chained datastore can control constraints per-datastore 

950 even if child datastore would accept.""" 

951 

952 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

953 

954 def setUp(self): 

955 # Override the working directory before calling the base class 

956 self.root = tempfile.mkdtemp(dir=TESTDIR) 

957 super().setUp() 

958 

959 def testConstraints(self): 

960 """Test chained datastore constraints model.""" 

961 metrics = makeExampleMetrics() 

962 datastore = self.makeDatastore() 

963 

964 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

965 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

966 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

967 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

968 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

969 

970 # Write empty file suitable for ingest check (JSON and YAML variants) 

971 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

972 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

973 

974 for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True), 

975 ("metric2", dataId1, sc1, (False, False, False), False), 

976 ("metric2", dataId2, sc1, (True, False, False), False), 

977 ("metric33", dataId2, sc2, (True, True, False), True), 

978 ("metric2", dataId1, sc2, (False, True, False), True)): 

979 

980 # Choose different temp file depending on StorageClass 

981 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

982 

983 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

984 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, 

985 conform=False) 

986 if any(accept): 

987 datastore.put(metrics, ref) 

988 self.assertTrue(datastore.exists(ref)) 

989 

990 # Check each datastore inside the chained datastore 

991 for childDatastore, expected in zip(datastore.datastores, accept): 

992 self.assertEqual(childDatastore.exists(ref), expected, 

993 f"Testing presence of {ref} in datastore {childDatastore.name}") 

994 

995 datastore.remove(ref) 

996 

997 # Check that ingest works 

998 if ingest: 

999 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1000 self.assertTrue(datastore.exists(ref)) 

1001 

1002 # Check each datastore inside the chained datastore 

1003 for childDatastore, expected in zip(datastore.datastores, accept): 

1004 # Ephemeral datastores means InMemory at the moment 

1005 # and that does not accept ingest of files. 

1006 if childDatastore.isEphemeral: 

1007 expected = False 

1008 self.assertEqual(childDatastore.exists(ref), expected, 

1009 f"Testing presence of ingested {ref} in datastore" 

1010 f" {childDatastore.name}") 

1011 

1012 datastore.remove(ref) 

1013 else: 

1014 with self.assertRaises(DatasetTypeNotSupportedError): 

1015 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1016 

1017 else: 

1018 with self.assertRaises(DatasetTypeNotSupportedError): 

1019 datastore.put(metrics, ref) 

1020 self.assertFalse(datastore.exists(ref)) 

1021 

1022 # Again with ingest 

1023 with self.assertRaises(DatasetTypeNotSupportedError): 

1024 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1025 self.assertFalse(datastore.exists(ref)) 

1026 

1027 

1028if __name__ == "__main__": 1028 ↛ 1029line 1028 didn't jump to line 1029, because the condition on line 1028 was never true

1029 unittest.main()