Coverage for tests/test_datastore.py: 15%

963 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-28 09:25 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import shutil 

24import sys 

25import tempfile 

26import time 

27import unittest 

28from collections import UserDict 

29from dataclasses import dataclass 

30 

31import lsst.utils.tests 

32import yaml 

33from lsst.daf.butler import ( 

34 Config, 

35 DatasetRefURIs, 

36 DatasetTypeNotSupportedError, 

37 DatastoreCacheManager, 

38 DatastoreCacheManagerConfig, 

39 DatastoreConfig, 

40 DatastoreDisabledCacheManager, 

41 DatastoreValidationError, 

42 DimensionUniverse, 

43 FileDataset, 

44 NamedKeyDict, 

45 StorageClass, 

46 StorageClassFactory, 

47) 

48from lsst.daf.butler.formatters.yaml import YamlFormatter 

49from lsst.daf.butler.tests import ( 

50 BadNoWriteFormatter, 

51 BadWriteFormatter, 

52 DatasetTestHelper, 

53 DatastoreTestHelper, 

54 DummyRegistry, 

55 MetricsExample, 

56) 

57from lsst.resources import ResourcePath 

58from lsst.utils import doImport 

59 

60TESTDIR = os.path.dirname(__file__) 

61 

62 

63class DataIdForTest(UserDict): 

64 

65 """A dict-like class that can be used for a DataId dict that is hashable. 

66 

67 By default the class is immutable ("frozen"). The `frozen` 

68 attribute can be set to `False` to change values (but note that 

69 the hash values before and after mutation will be different!). 

70 """ 

71 

72 def __init__(self, *args, **kwargs): 

73 self.frozen = False 

74 super().__init__(*args, **kwargs) 

75 self.frozen = True 

76 

77 def __hash__(self): 

78 return hash(str(self.data)) 

79 

80 def __setitem__(self, k, v): 

81 if self.frozen: 

82 raise RuntimeError("DataIdForTest is frozen.") 

83 return super().__setitem__(k, v) 

84 

85 def __delitem__(self, k): 

86 if self.frozen: 

87 raise RuntimeError("DataIdForTest is frozen.") 

88 return super().__delitem__(k) 

89 

90 def __ior__(self, other): 

91 assert sys.version_info[0] == 3 

92 if sys.version_info[1] < 9: 

93 raise NotImplementedError("operator |= (ior) is not supported before version 3.9") 

94 if self.frozen: 

95 raise RuntimeError("DataIdForTest is frozen.") 

96 return super().__ior__(other) 

97 

98 def pop(self, k): 

99 if self.frozen: 

100 raise RuntimeError("DataIdForTest is frozen.") 

101 return super().pop(k) 

102 

103 def popitem(self): 

104 if self.frozen: 

105 raise RuntimeError("DataIdForTest is frozen.") 

106 return super().popitem() 

107 

108 def update(self, *args, **kwargs): 

109 if self.frozen: 

110 raise RuntimeError("DataIdForTest is frozen.") 

111 super().update(*args, **kwargs) 

112 

113 

114def makeExampleMetrics(use_none=False): 

115 if use_none: 

116 array = None 

117 else: 

118 array = [563, 234, 456.7, 105, 2054, -1045] 

119 return MetricsExample( 

120 {"AM1": 5.2, "AM2": 30.6}, 

121 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

122 array, 

123 ) 

124 

125 

126@dataclass(frozen=True) 

127class Named: 

128 name: str 

129 

130 

131class FakeDataCoordinate(NamedKeyDict): 

132 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

133 

134 @classmethod 

135 def from_dict(cls, dataId): 

136 new = cls() 

137 for k, v in dataId.items(): 

138 new[Named(k)] = v 

139 return new.freeze() 

140 

141 def __hash__(self) -> int: 

142 return hash(frozenset(self.items())) 

143 

144 

145class TransactionTestError(Exception): 

146 """Specific error for transactions, to prevent misdiagnosing 

147 that might otherwise occur when a standard exception is used. 

148 """ 

149 

150 pass 

151 

152 

153class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

154 """Support routines for datastore testing""" 

155 

156 root = None 

157 

158 @classmethod 

159 def setUpClass(cls): 

160 # Storage Classes are fixed for all datastores in these tests 

161 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

162 cls.storageClassFactory = StorageClassFactory() 

163 cls.storageClassFactory.addFromConfig(scConfigFile) 

164 

165 # Read the Datastore config so we can get the class 

166 # information (since we should not assume the constructor 

167 # name here, but rely on the configuration file itself) 

168 datastoreConfig = DatastoreConfig(cls.configFile) 

169 cls.datastoreType = doImport(datastoreConfig["cls"]) 

170 cls.universe = DimensionUniverse() 

171 

172 def setUp(self): 

173 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

174 

175 def tearDown(self): 

176 if self.root is not None and os.path.exists(self.root): 

177 shutil.rmtree(self.root, ignore_errors=True) 

178 

179 

180class DatastoreTests(DatastoreTestsBase): 

181 """Some basic tests of a simple datastore.""" 

182 

183 hasUnsupportedPut = True 

184 

185 def testConfigRoot(self): 

186 full = DatastoreConfig(self.configFile) 

187 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

188 newroot = "/random/location" 

189 self.datastoreType.setConfigRoot(newroot, config, full) 

190 if self.rootKeys: 

191 for k in self.rootKeys: 

192 self.assertIn(newroot, config[k]) 

193 

194 def testConstructor(self): 

195 datastore = self.makeDatastore() 

196 self.assertIsNotNone(datastore) 

197 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

198 

199 def testConfigurationValidation(self): 

200 datastore = self.makeDatastore() 

201 sc = self.storageClassFactory.getStorageClass("ThingOne") 

202 datastore.validateConfiguration([sc]) 

203 

204 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

205 if self.validationCanFail: 

206 with self.assertRaises(DatastoreValidationError): 

207 datastore.validateConfiguration([sc2], logFailures=True) 

208 

209 dimensions = self.universe.extract(("visit", "physical_filter")) 

210 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

211 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

212 datastore.validateConfiguration([ref]) 

213 

214 def testParameterValidation(self): 

215 """Check that parameters are validated""" 

216 sc = self.storageClassFactory.getStorageClass("ThingOne") 

217 dimensions = self.universe.extract(("visit", "physical_filter")) 

218 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

219 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

220 datastore = self.makeDatastore() 

221 data = {1: 2, 3: 4} 

222 datastore.put(data, ref) 

223 newdata = datastore.get(ref) 

224 self.assertEqual(data, newdata) 

225 with self.assertRaises(KeyError): 

226 newdata = datastore.get(ref, parameters={"missing": 5}) 

227 

228 def testBasicPutGet(self): 

229 metrics = makeExampleMetrics() 

230 datastore = self.makeDatastore() 

231 

232 # Create multiple storage classes for testing different formulations 

233 storageClasses = [ 

234 self.storageClassFactory.getStorageClass(sc) 

235 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

236 ] 

237 

238 dimensions = self.universe.extract(("visit", "physical_filter")) 

239 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

240 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"}) 

241 

242 for sc in storageClasses: 

243 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

244 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False) 

245 

246 # Make sure that using getManyURIs without predicting before the 

247 # dataset has been put raises. 

248 with self.assertRaises(FileNotFoundError): 

249 datastore.getManyURIs([ref], predict=False) 

250 

251 # Make sure that using getManyURIs with predicting before the 

252 # dataset has been put predicts the URI. 

253 uris = datastore.getManyURIs([ref, ref2], predict=True) 

254 self.assertIn("52", uris[ref].primaryURI.geturl()) 

255 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

256 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

257 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

258 

259 datastore.put(metrics, ref) 

260 

261 # Does it exist? 

262 self.assertTrue(datastore.exists(ref)) 

263 

264 # Get 

265 metricsOut = datastore.get(ref, parameters=None) 

266 self.assertEqual(metrics, metricsOut) 

267 

268 uri = datastore.getURI(ref) 

269 self.assertEqual(uri.scheme, self.uriScheme) 

270 

271 uris = datastore.getManyURIs([ref]) 

272 self.assertEqual(len(uris), 1) 

273 ref, uri = uris.popitem() 

274 self.assertTrue(uri.primaryURI.exists()) 

275 self.assertFalse(uri.componentURIs) 

276 

277 # Get a component -- we need to construct new refs for them 

278 # with derived storage classes but with parent ID 

279 for comp in ("data", "output"): 

280 compRef = ref.makeComponentRef(comp) 

281 output = datastore.get(compRef) 

282 self.assertEqual(output, getattr(metricsOut, comp)) 

283 

284 uri = datastore.getURI(compRef) 

285 self.assertEqual(uri.scheme, self.uriScheme) 

286 

287 uris = datastore.getManyURIs([compRef]) 

288 self.assertEqual(len(uris), 1) 

289 

290 storageClass = sc 

291 

292 # Check that we can put a metric with None in a component and 

293 # get it back as None 

294 metricsNone = makeExampleMetrics(use_none=True) 

295 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

296 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

297 datastore.put(metricsNone, refNone) 

298 

299 comp = "data" 

300 for comp in ("data", "output"): 

301 compRef = refNone.makeComponentRef(comp) 

302 output = datastore.get(compRef) 

303 self.assertEqual(output, getattr(metricsNone, comp)) 

304 

305 # Check that a put fails if the dataset type is not supported 

306 if self.hasUnsupportedPut: 

307 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

308 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

309 with self.assertRaises(DatasetTypeNotSupportedError): 

310 datastore.put(metrics, ref) 

311 

312 # These should raise 

313 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

314 with self.assertRaises(FileNotFoundError): 

315 # non-existing file 

316 datastore.get(ref) 

317 

318 # Get a URI from it 

319 uri = datastore.getURI(ref, predict=True) 

320 self.assertEqual(uri.scheme, self.uriScheme) 

321 

322 with self.assertRaises(FileNotFoundError): 

323 datastore.getURI(ref) 

324 

325 def testTrustGetRequest(self): 

326 """Check that we can get datasets that registry knows nothing about.""" 

327 

328 datastore = self.makeDatastore() 

329 

330 # Skip test if the attribute is not defined 

331 if not hasattr(datastore, "trustGetRequest"): 

332 return 

333 

334 metrics = makeExampleMetrics() 

335 

336 i = 0 

337 for sc_name in ("StructuredData", "StructuredComposite"): 

338 i += 1 

339 datasetTypeName = f"metric{i}" 

340 

341 if sc_name == "StructuredComposite": 

342 disassembled = True 

343 else: 

344 disassembled = False 

345 

346 # Start datastore in default configuration of using registry 

347 datastore.trustGetRequest = False 

348 

349 # Create multiple storage classes for testing with or without 

350 # disassembly 

351 sc = self.storageClassFactory.getStorageClass(sc_name) 

352 dimensions = self.universe.extract(("visit", "physical_filter")) 

353 

354 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}) 

355 

356 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

357 datastore.put(metrics, ref) 

358 

359 # Does it exist? 

360 self.assertTrue(datastore.exists(ref)) 

361 

362 # Get 

363 metricsOut = datastore.get(ref) 

364 self.assertEqual(metrics, metricsOut) 

365 

366 # Get the URI(s) 

367 primaryURI, componentURIs = datastore.getURIs(ref) 

368 if disassembled: 

369 self.assertIsNone(primaryURI) 

370 self.assertEqual(len(componentURIs), 3) 

371 else: 

372 self.assertIn(datasetTypeName, primaryURI.path) 

373 self.assertFalse(componentURIs) 

374 

375 # Delete registry entry so now we are trusting 

376 datastore.removeStoredItemInfo(ref) 

377 

378 # Now stop trusting and check that things break 

379 datastore.trustGetRequest = False 

380 

381 # Does it exist? 

382 self.assertFalse(datastore.exists(ref)) 

383 

384 with self.assertRaises(FileNotFoundError): 

385 datastore.get(ref) 

386 

387 with self.assertRaises(FileNotFoundError): 

388 datastore.get(ref.makeComponentRef("data")) 

389 

390 # URI should fail unless we ask for prediction 

391 with self.assertRaises(FileNotFoundError): 

392 datastore.getURIs(ref) 

393 

394 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

395 if disassembled: 

396 self.assertIsNone(predicted_primary) 

397 self.assertEqual(len(predicted_disassembled), 3) 

398 for uri in predicted_disassembled.values(): 

399 self.assertEqual(uri.fragment, "predicted") 

400 self.assertIn(datasetTypeName, uri.path) 

401 else: 

402 self.assertIn(datasetTypeName, predicted_primary.path) 

403 self.assertFalse(predicted_disassembled) 

404 self.assertEqual(predicted_primary.fragment, "predicted") 

405 

406 # Now enable registry-free trusting mode 

407 datastore.trustGetRequest = True 

408 

409 # Try again to get it 

410 metricsOut = datastore.get(ref) 

411 self.assertEqual(metricsOut, metrics) 

412 

413 # Does it exist? 

414 self.assertTrue(datastore.exists(ref)) 

415 

416 # Get a component 

417 comp = "data" 

418 compRef = ref.makeComponentRef(comp) 

419 output = datastore.get(compRef) 

420 self.assertEqual(output, getattr(metrics, comp)) 

421 

422 # Get the URI -- if we trust this should work even without 

423 # enabling prediction. 

424 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

425 self.assertEqual(primaryURI2, primaryURI) 

426 self.assertEqual(componentURIs2, componentURIs) 

427 

428 def testDisassembly(self): 

429 """Test disassembly within datastore.""" 

430 metrics = makeExampleMetrics() 

431 if self.isEphemeral: 

432 # in-memory datastore does not disassemble 

433 return 

434 

435 # Create multiple storage classes for testing different formulations 

436 # of composites. One of these will not disassemble to provide 

437 # a reference. 

438 storageClasses = [ 

439 self.storageClassFactory.getStorageClass(sc) 

440 for sc in ( 

441 "StructuredComposite", 

442 "StructuredCompositeTestA", 

443 "StructuredCompositeTestB", 

444 "StructuredCompositeReadComp", 

445 "StructuredData", # No disassembly 

446 "StructuredCompositeReadCompNoDisassembly", 

447 ) 

448 ] 

449 

450 # Create the test datastore 

451 datastore = self.makeDatastore() 

452 

453 # Dummy dataId 

454 dimensions = self.universe.extract(("visit", "physical_filter")) 

455 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

456 

457 for i, sc in enumerate(storageClasses): 

458 with self.subTest(storageClass=sc.name): 

459 # Create a different dataset type each time round 

460 # so that a test failure in this subtest does not trigger 

461 # a cascade of tests because of file clashes 

462 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False) 

463 

464 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

465 

466 datastore.put(metrics, ref) 

467 

468 baseURI, compURIs = datastore.getURIs(ref) 

469 if disassembled: 

470 self.assertIsNone(baseURI) 

471 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

472 else: 

473 self.assertIsNotNone(baseURI) 

474 self.assertEqual(compURIs, {}) 

475 

476 metrics_get = datastore.get(ref) 

477 self.assertEqual(metrics_get, metrics) 

478 

479 # Retrieve the composite with read parameter 

480 stop = 4 

481 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

482 self.assertEqual(metrics_get.summary, metrics.summary) 

483 self.assertEqual(metrics_get.output, metrics.output) 

484 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

485 

486 # Retrieve a component 

487 data = datastore.get(ref.makeComponentRef("data")) 

488 self.assertEqual(data, metrics.data) 

489 

490 # On supported storage classes attempt to access a read 

491 # only component 

492 if "ReadComp" in sc.name: 

493 cRef = ref.makeComponentRef("counter") 

494 counter = datastore.get(cRef) 

495 self.assertEqual(counter, len(metrics.data)) 

496 

497 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

498 self.assertEqual(counter, stop) 

499 

500 datastore.remove(ref) 

501 

502 def testRegistryCompositePutGet(self): 

503 """Tests the case where registry disassembles and puts to datastore.""" 

504 metrics = makeExampleMetrics() 

505 datastore = self.makeDatastore() 

506 

507 # Create multiple storage classes for testing different formulations 

508 # of composites 

509 storageClasses = [ 

510 self.storageClassFactory.getStorageClass(sc) 

511 for sc in ( 

512 "StructuredComposite", 

513 "StructuredCompositeTestA", 

514 "StructuredCompositeTestB", 

515 ) 

516 ] 

517 

518 dimensions = self.universe.extract(("visit", "physical_filter")) 

519 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

520 

521 for sc in storageClasses: 

522 print("Using storageClass: {}".format(sc.name)) 

523 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

524 

525 components = sc.delegate().disassemble(metrics) 

526 self.assertTrue(components) 

527 

528 compsRead = {} 

529 for compName, compInfo in components.items(): 

530 compRef = self.makeDatasetRef( 

531 ref.datasetType.componentTypeName(compName), 

532 dimensions, 

533 components[compName].storageClass, 

534 dataId, 

535 conform=False, 

536 ) 

537 

538 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name)) 

539 datastore.put(compInfo.component, compRef) 

540 

541 uri = datastore.getURI(compRef) 

542 self.assertEqual(uri.scheme, self.uriScheme) 

543 

544 compsRead[compName] = datastore.get(compRef) 

545 

546 # We can generate identical files for each storage class 

547 # so remove the component here 

548 datastore.remove(compRef) 

549 

550 # combine all the components we read back into a new composite 

551 metricsOut = sc.delegate().assemble(compsRead) 

552 self.assertEqual(metrics, metricsOut) 

553 

554 def prepDeleteTest(self, n_refs=1): 

555 metrics = makeExampleMetrics() 

556 datastore = self.makeDatastore() 

557 # Put 

558 dimensions = self.universe.extract(("visit", "physical_filter")) 

559 sc = self.storageClassFactory.getStorageClass("StructuredData") 

560 refs = [] 

561 for i in range(n_refs): 

562 dataId = FakeDataCoordinate.from_dict( 

563 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"} 

564 ) 

565 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

566 datastore.put(metrics, ref) 

567 

568 # Does it exist? 

569 self.assertTrue(datastore.exists(ref)) 

570 

571 # Get 

572 metricsOut = datastore.get(ref) 

573 self.assertEqual(metrics, metricsOut) 

574 refs.append(ref) 

575 

576 return datastore, *refs 

577 

578 def testRemove(self): 

579 datastore, ref = self.prepDeleteTest() 

580 

581 # Remove 

582 datastore.remove(ref) 

583 

584 # Does it exist? 

585 self.assertFalse(datastore.exists(ref)) 

586 

587 # Do we now get a predicted URI? 

588 uri = datastore.getURI(ref, predict=True) 

589 self.assertEqual(uri.fragment, "predicted") 

590 

591 # Get should now fail 

592 with self.assertRaises(FileNotFoundError): 

593 datastore.get(ref) 

594 # Can only delete once 

595 with self.assertRaises(FileNotFoundError): 

596 datastore.remove(ref) 

597 

598 def testForget(self): 

599 datastore, ref = self.prepDeleteTest() 

600 

601 # Remove 

602 datastore.forget([ref]) 

603 

604 # Does it exist (as far as we know)? 

605 self.assertFalse(datastore.exists(ref)) 

606 

607 # Do we now get a predicted URI? 

608 uri = datastore.getURI(ref, predict=True) 

609 self.assertEqual(uri.fragment, "predicted") 

610 

611 # Get should now fail 

612 with self.assertRaises(FileNotFoundError): 

613 datastore.get(ref) 

614 

615 # Forgetting again is a silent no-op 

616 datastore.forget([ref]) 

617 

618 # Predicted URI should still point to the file. 

619 self.assertTrue(uri.exists()) 

620 

621 def testTransfer(self): 

622 metrics = makeExampleMetrics() 

623 

624 dimensions = self.universe.extract(("visit", "physical_filter")) 

625 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"} 

626 

627 sc = self.storageClassFactory.getStorageClass("StructuredData") 

628 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

629 

630 inputDatastore = self.makeDatastore("test_input_datastore") 

631 outputDatastore = self.makeDatastore("test_output_datastore") 

632 

633 inputDatastore.put(metrics, ref) 

634 outputDatastore.transfer(inputDatastore, ref) 

635 

636 metricsOut = outputDatastore.get(ref) 

637 self.assertEqual(metrics, metricsOut) 

638 

639 def testBasicTransaction(self): 

640 datastore = self.makeDatastore() 

641 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

642 dimensions = self.universe.extract(("visit", "physical_filter")) 

643 nDatasets = 6 

644 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] 

645 data = [ 

646 ( 

647 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

648 makeExampleMetrics(), 

649 ) 

650 for dataId in dataIds 

651 ] 

652 succeed = data[: nDatasets // 2] 

653 fail = data[nDatasets // 2 :] 

654 # All datasets added in this transaction should continue to exist 

655 with datastore.transaction(): 

656 for ref, metrics in succeed: 

657 datastore.put(metrics, ref) 

658 # Whereas datasets added in this transaction should not 

659 with self.assertRaises(TransactionTestError): 

660 with datastore.transaction(): 

661 for ref, metrics in fail: 

662 datastore.put(metrics, ref) 

663 raise TransactionTestError("This should propagate out of the context manager") 

664 # Check for datasets that should exist 

665 for ref, metrics in succeed: 

666 # Does it exist? 

667 self.assertTrue(datastore.exists(ref)) 

668 # Get 

669 metricsOut = datastore.get(ref, parameters=None) 

670 self.assertEqual(metrics, metricsOut) 

671 # URI 

672 uri = datastore.getURI(ref) 

673 self.assertEqual(uri.scheme, self.uriScheme) 

674 # Check for datasets that should not exist 

675 for ref, _ in fail: 

676 # These should raise 

677 with self.assertRaises(FileNotFoundError): 

678 # non-existing file 

679 datastore.get(ref) 

680 with self.assertRaises(FileNotFoundError): 

681 datastore.getURI(ref) 

682 

683 def testNestedTransaction(self): 

684 datastore = self.makeDatastore() 

685 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

686 dimensions = self.universe.extract(("visit", "physical_filter")) 

687 metrics = makeExampleMetrics() 

688 

689 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

690 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

691 datastore.put(metrics, refBefore) 

692 with self.assertRaises(TransactionTestError): 

693 with datastore.transaction(): 

694 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} 

695 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

696 datastore.put(metrics, refOuter) 

697 with datastore.transaction(): 

698 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} 

699 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

700 datastore.put(metrics, refInner) 

701 # All datasets should exist 

702 for ref in (refBefore, refOuter, refInner): 

703 metricsOut = datastore.get(ref, parameters=None) 

704 self.assertEqual(metrics, metricsOut) 

705 raise TransactionTestError("This should roll back the transaction") 

706 # Dataset(s) inserted before the transaction should still exist 

707 metricsOut = datastore.get(refBefore, parameters=None) 

708 self.assertEqual(metrics, metricsOut) 

709 # But all datasets inserted during the (rolled back) transaction 

710 # should be gone 

711 with self.assertRaises(FileNotFoundError): 

712 datastore.get(refOuter) 

713 with self.assertRaises(FileNotFoundError): 

714 datastore.get(refInner) 

715 

716 def _prepareIngestTest(self): 

717 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

718 dimensions = self.universe.extract(("visit", "physical_filter")) 

719 metrics = makeExampleMetrics() 

720 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

721 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

722 return metrics, ref 

723 

724 def runIngestTest(self, func, expectOutput=True): 

725 metrics, ref = self._prepareIngestTest() 

726 # The file will be deleted after the test. 

727 # For symlink tests this leads to a situation where the datastore 

728 # points to a file that does not exist. This will make os.path.exist 

729 # return False but then the new symlink will fail with 

730 # FileExistsError later in the code so the test still passes. 

731 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

732 with open(path, "w") as fd: 

733 yaml.dump(metrics._asdict(), stream=fd) 

734 func(metrics, path, ref) 

735 

736 def testIngestNoTransfer(self): 

737 """Test ingesting existing files with no transfer.""" 

738 for mode in (None, "auto"): 

739 

740 # Some datastores have auto but can't do in place transfer 

741 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

742 continue 

743 

744 with self.subTest(mode=mode): 

745 datastore = self.makeDatastore() 

746 

747 def succeed(obj, path, ref): 

748 """Ingest a file already in the datastore root.""" 

749 # first move it into the root, and adjust the path 

750 # accordingly 

751 path = shutil.copy(path, datastore.root.ospath) 

752 path = os.path.relpath(path, start=datastore.root.ospath) 

753 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

754 self.assertEqual(obj, datastore.get(ref)) 

755 

756 def failInputDoesNotExist(obj, path, ref): 

757 """Can't ingest files if we're given a bad path.""" 

758 with self.assertRaises(FileNotFoundError): 

759 datastore.ingest( 

760 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

761 ) 

762 self.assertFalse(datastore.exists(ref)) 

763 

764 def failOutsideRoot(obj, path, ref): 

765 """Can't ingest files outside of datastore root unless 

766 auto.""" 

767 if mode == "auto": 

768 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

769 self.assertTrue(datastore.exists(ref)) 

770 else: 

771 with self.assertRaises(RuntimeError): 

772 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

773 self.assertFalse(datastore.exists(ref)) 

774 

775 def failNotImplemented(obj, path, ref): 

776 with self.assertRaises(NotImplementedError): 

777 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

778 

779 if mode in self.ingestTransferModes: 

780 self.runIngestTest(failOutsideRoot) 

781 self.runIngestTest(failInputDoesNotExist) 

782 self.runIngestTest(succeed) 

783 else: 

784 self.runIngestTest(failNotImplemented) 

785 

786 def testIngestTransfer(self): 

787 """Test ingesting existing files after transferring them.""" 

788 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

789 with self.subTest(mode=mode): 

790 datastore = self.makeDatastore(mode) 

791 

792 def succeed(obj, path, ref): 

793 """Ingest a file by transferring it to the template 

794 location.""" 

795 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

796 self.assertEqual(obj, datastore.get(ref)) 

797 

798 def failInputDoesNotExist(obj, path, ref): 

799 """Can't ingest files if we're given a bad path.""" 

800 with self.assertRaises(FileNotFoundError): 

801 # Ensure the file does not look like it is in 

802 # datastore for auto mode 

803 datastore.ingest( 

804 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

805 ) 

806 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

807 

808 def failNotImplemented(obj, path, ref): 

809 with self.assertRaises(NotImplementedError): 

810 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

811 

812 if mode in self.ingestTransferModes: 

813 self.runIngestTest(failInputDoesNotExist) 

814 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

815 else: 

816 self.runIngestTest(failNotImplemented) 

817 

818 def testIngestSymlinkOfSymlink(self): 

819 """Special test for symlink to a symlink ingest""" 

820 metrics, ref = self._prepareIngestTest() 

821 # The aim of this test is to create a dataset on disk, then 

822 # create a symlink to it and finally ingest the symlink such that 

823 # the symlink in the datastore points to the original dataset. 

824 for mode in ("symlink", "relsymlink"): 

825 if mode not in self.ingestTransferModes: 

826 continue 

827 

828 print(f"Trying mode {mode}") 

829 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

830 with open(realpath, "w") as fd: 

831 yaml.dump(metrics._asdict(), stream=fd) 

832 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

833 os.symlink(os.path.abspath(realpath), sympath) 

834 

835 datastore = self.makeDatastore() 

836 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

837 

838 uri = datastore.getURI(ref) 

839 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

840 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

841 

842 linkTarget = os.readlink(uri.ospath) 

843 if mode == "relsymlink": 

844 self.assertFalse(os.path.isabs(linkTarget)) 

845 else: 

846 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

847 

848 # Check that we can get the dataset back regardless of mode 

849 metric2 = datastore.get(ref) 

850 self.assertEqual(metric2, metrics) 

851 

852 # Cleanup the file for next time round loop 

853 # since it will get the same file name in store 

854 datastore.remove(ref) 

855 

856 def testExportImportRecords(self): 

857 """Test for export_records and import_records methods.""" 

858 

859 datastore = self.makeDatastore("test_datastore") 

860 

861 # For now only the FileDatastore can be used for this test. 

862 # ChainedDatastore that only includes InMemoryDatastores have to be 

863 # skipped as well. 

864 for name in datastore.names: 

865 if not name.startswith("InMemoryDatastore"): 

866 break 

867 else: 

868 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

869 

870 metrics = makeExampleMetrics() 

871 dimensions = self.universe.extract(("visit", "physical_filter")) 

872 sc = self.storageClassFactory.getStorageClass("StructuredData") 

873 

874 refs = [] 

875 for visit in (2048, 2049, 2050): 

876 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"} 

877 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

878 datastore.put(metrics, ref) 

879 refs.append(ref) 

880 

881 for exported_refs in (refs, refs[1:]): 

882 n_refs = len(exported_refs) 

883 records = datastore.export_records(exported_refs) 

884 self.assertGreater(len(records), 0) 

885 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

886 # In a ChainedDatastore each FileDatastore will have a complete set 

887 for datastore_name in records: 

888 record_data = records[datastore_name] 

889 self.assertEqual(len(record_data.records), n_refs) 

890 

891 # Use the same datastore name to import relative path. 

892 datastore2 = self.makeDatastore("test_datastore") 

893 

894 records = datastore.export_records(refs[1:]) 

895 datastore2.import_records(records) 

896 

897 with self.assertRaises(FileNotFoundError): 

898 data = datastore2.get(refs[0]) 

899 data = datastore2.get(refs[1]) 

900 self.assertIsNotNone(data) 

901 data = datastore2.get(refs[2]) 

902 self.assertIsNotNone(data) 

903 

904 

905class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

906 """PosixDatastore specialization""" 

907 

908 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

909 uriScheme = "file" 

910 canIngestNoTransferAuto = True 

911 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

912 isEphemeral = False 

913 rootKeys = ("root",) 

914 validationCanFail = True 

915 

916 def setUp(self): 

917 # Override the working directory before calling the base class 

918 self.root = tempfile.mkdtemp(dir=TESTDIR) 

919 super().setUp() 

920 

921 def testCanNotDeterminePutFormatterLocation(self): 

922 """Verify that the expected exception is raised if the FileDatastore 

923 can not determine the put formatter location.""" 

924 

925 _ = makeExampleMetrics() 

926 datastore = self.makeDatastore() 

927 

928 # Create multiple storage classes for testing different formulations 

929 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

930 

931 sccomp = StorageClass("Dummy") 

932 compositeStorageClass = StorageClass( 

933 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

934 ) 

935 

936 dimensions = self.universe.extract(("visit", "physical_filter")) 

937 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

938 

939 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

940 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False) 

941 

942 def raiser(ref): 

943 raise DatasetTypeNotSupportedError() 

944 

945 with unittest.mock.patch.object( 

946 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

947 "_determine_put_formatter_location", 

948 side_effect=raiser, 

949 ): 

950 # verify the non-composite ref execution path: 

951 with self.assertRaises(DatasetTypeNotSupportedError): 

952 datastore.getURIs(ref, predict=True) 

953 

954 # verify the composite-ref execution path: 

955 with self.assertRaises(DatasetTypeNotSupportedError): 

956 datastore.getURIs(compRef, predict=True) 

957 

958 

959class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

960 """Posix datastore tests but with checksums disabled.""" 

961 

962 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

963 

964 def testChecksum(self): 

965 """Ensure that checksums have not been calculated.""" 

966 

967 datastore = self.makeDatastore() 

968 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

969 dimensions = self.universe.extract(("visit", "physical_filter")) 

970 metrics = makeExampleMetrics() 

971 

972 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

973 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

974 

975 # Configuration should have disabled checksum calculation 

976 datastore.put(metrics, ref) 

977 infos = datastore.getStoredItemsInfo(ref) 

978 self.assertIsNone(infos[0].checksum) 

979 

980 # Remove put back but with checksums enabled explicitly 

981 datastore.remove(ref) 

982 datastore.useChecksum = True 

983 datastore.put(metrics, ref) 

984 

985 infos = datastore.getStoredItemsInfo(ref) 

986 self.assertIsNotNone(infos[0].checksum) 

987 

988 

989class TrashDatastoreTestCase(PosixDatastoreTestCase): 

990 """Restrict trash test to FileDatastore.""" 

991 

992 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

993 

994 def testTrash(self): 

995 datastore, *refs = self.prepDeleteTest(n_refs=10) 

996 

997 # Trash one of them. 

998 ref = refs.pop() 

999 uri = datastore.getURI(ref) 

1000 datastore.trash(ref) 

1001 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1002 datastore.emptyTrash() 

1003 self.assertFalse(uri.exists(), uri) 

1004 

1005 # Trash it again should be fine. 

1006 datastore.trash(ref) 

1007 

1008 # Trash multiple items at once. 

1009 subset = [refs.pop(), refs.pop()] 

1010 datastore.trash(subset) 

1011 datastore.emptyTrash() 

1012 

1013 # Remove a record and trash should do nothing. 

1014 # This is execution butler scenario. 

1015 ref = refs.pop() 

1016 uri = datastore.getURI(ref) 

1017 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1018 self.assertTrue(uri.exists()) 

1019 datastore.trash(ref) 

1020 datastore.emptyTrash() 

1021 self.assertTrue(uri.exists()) 

1022 

1023 # Switch on trust and it should delete the file. 

1024 datastore.trustGetRequest = True 

1025 datastore.trash([ref]) 

1026 self.assertFalse(uri.exists()) 

1027 

1028 # Remove multiples at once in trust mode. 

1029 subset = [refs.pop() for i in range(3)] 

1030 datastore.trash(subset) 

1031 datastore.trash(refs.pop()) # Check that a single ref can trash 

1032 

1033 

1034class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1035 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1036 

1037 def setUp(self): 

1038 # Override the working directory before calling the base class 

1039 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1040 super().setUp() 

1041 

1042 def testCleanup(self): 

1043 """Test that a failed formatter write does cleanup a partial file.""" 

1044 metrics = makeExampleMetrics() 

1045 datastore = self.makeDatastore() 

1046 

1047 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1048 

1049 dimensions = self.universe.extract(("visit", "physical_filter")) 

1050 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1051 

1052 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1053 

1054 # Determine where the file will end up (we assume Formatters use 

1055 # the same file extension) 

1056 expectedUri = datastore.getURI(ref, predict=True) 

1057 self.assertEqual(expectedUri.fragment, "predicted") 

1058 

1059 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1060 

1061 # Try formatter that fails and formatter that fails and leaves 

1062 # a file behind 

1063 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1064 with self.subTest(formatter=formatter): 

1065 

1066 # Monkey patch the formatter 

1067 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1068 

1069 # Try to put the dataset, it should fail 

1070 with self.assertRaises(Exception): 

1071 datastore.put(metrics, ref) 

1072 

1073 # Check that there is no file on disk 

1074 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1075 

1076 # Check that there is a directory 

1077 dir = expectedUri.dirname() 

1078 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1079 

1080 # Force YamlFormatter and check that this time a file is written 

1081 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1082 datastore.put(metrics, ref) 

1083 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1084 datastore.remove(ref) 

1085 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1086 

1087 

1088class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1089 """PosixDatastore specialization""" 

1090 

1091 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1092 uriScheme = "mem" 

1093 hasUnsupportedPut = False 

1094 ingestTransferModes = () 

1095 isEphemeral = True 

1096 rootKeys = None 

1097 validationCanFail = False 

1098 

1099 

1100class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1101 """ChainedDatastore specialization using a POSIXDatastore""" 

1102 

1103 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1104 hasUnsupportedPut = False 

1105 canIngestNoTransferAuto = False 

1106 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto") 

1107 isEphemeral = False 

1108 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1109 validationCanFail = True 

1110 

1111 

1112class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1113 """ChainedDatastore specialization using all InMemoryDatastore""" 

1114 

1115 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1116 validationCanFail = False 

1117 

1118 

1119class DatastoreConstraintsTests(DatastoreTestsBase): 

1120 """Basic tests of constraints model of Datastores.""" 

1121 

1122 def testConstraints(self): 

1123 """Test constraints model. Assumes that each test class has the 

1124 same constraints.""" 

1125 metrics = makeExampleMetrics() 

1126 datastore = self.makeDatastore() 

1127 

1128 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1129 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1130 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1131 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1132 

1133 # Write empty file suitable for ingest check (JSON and YAML variants) 

1134 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1135 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1136 for datasetTypeName, sc, accepted in ( 

1137 ("metric", sc1, True), 

1138 ("metric2", sc1, False), 

1139 ("metric33", sc1, True), 

1140 ("metric2", sc2, True), 

1141 ): 

1142 # Choose different temp file depending on StorageClass 

1143 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1144 

1145 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1146 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

1147 if accepted: 

1148 datastore.put(metrics, ref) 

1149 self.assertTrue(datastore.exists(ref)) 

1150 datastore.remove(ref) 

1151 

1152 # Try ingest 

1153 if self.canIngest: 

1154 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1155 self.assertTrue(datastore.exists(ref)) 

1156 datastore.remove(ref) 

1157 else: 

1158 with self.assertRaises(DatasetTypeNotSupportedError): 

1159 datastore.put(metrics, ref) 

1160 self.assertFalse(datastore.exists(ref)) 

1161 

1162 # Again with ingest 

1163 if self.canIngest: 

1164 with self.assertRaises(DatasetTypeNotSupportedError): 

1165 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1166 self.assertFalse(datastore.exists(ref)) 

1167 

1168 

1169class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1170 """PosixDatastore specialization""" 

1171 

1172 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1173 canIngest = True 

1174 

1175 def setUp(self): 

1176 # Override the working directory before calling the base class 

1177 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1178 super().setUp() 

1179 

1180 

1181class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1182 """InMemoryDatastore specialization""" 

1183 

1184 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1185 canIngest = False 

1186 

1187 

1188class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1189 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1190 at the ChainedDatstore""" 

1191 

1192 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1193 

1194 

1195class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1196 """ChainedDatastore specialization using a POSIXDatastore""" 

1197 

1198 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1199 

1200 

1201class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1202 """ChainedDatastore specialization using all InMemoryDatastore""" 

1203 

1204 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1205 canIngest = False 

1206 

1207 

1208class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1209 """Test that a chained datastore can control constraints per-datastore 

1210 even if child datastore would accept.""" 

1211 

1212 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1213 

1214 def setUp(self): 

1215 # Override the working directory before calling the base class 

1216 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1217 super().setUp() 

1218 

1219 def testConstraints(self): 

1220 """Test chained datastore constraints model.""" 

1221 metrics = makeExampleMetrics() 

1222 datastore = self.makeDatastore() 

1223 

1224 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1225 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1226 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1227 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1228 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1229 

1230 # Write empty file suitable for ingest check (JSON and YAML variants) 

1231 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1232 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1233 

1234 for typeName, dataId, sc, accept, ingest in ( 

1235 ("metric", dataId1, sc1, (False, True, False), True), 

1236 ("metric2", dataId1, sc1, (False, False, False), False), 

1237 ("metric2", dataId2, sc1, (True, False, False), False), 

1238 ("metric33", dataId2, sc2, (True, True, False), True), 

1239 ("metric2", dataId1, sc2, (False, True, False), True), 

1240 ): 

1241 

1242 # Choose different temp file depending on StorageClass 

1243 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1244 

1245 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1246 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) 

1247 if any(accept): 

1248 datastore.put(metrics, ref) 

1249 self.assertTrue(datastore.exists(ref)) 

1250 

1251 # Check each datastore inside the chained datastore 

1252 for childDatastore, expected in zip(datastore.datastores, accept): 

1253 self.assertEqual( 

1254 childDatastore.exists(ref), 

1255 expected, 

1256 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1257 ) 

1258 

1259 datastore.remove(ref) 

1260 

1261 # Check that ingest works 

1262 if ingest: 

1263 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1264 self.assertTrue(datastore.exists(ref)) 

1265 

1266 # Check each datastore inside the chained datastore 

1267 for childDatastore, expected in zip(datastore.datastores, accept): 

1268 # Ephemeral datastores means InMemory at the moment 

1269 # and that does not accept ingest of files. 

1270 if childDatastore.isEphemeral: 

1271 expected = False 

1272 self.assertEqual( 

1273 childDatastore.exists(ref), 

1274 expected, 

1275 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1276 ) 

1277 

1278 datastore.remove(ref) 

1279 else: 

1280 with self.assertRaises(DatasetTypeNotSupportedError): 

1281 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1282 

1283 else: 

1284 with self.assertRaises(DatasetTypeNotSupportedError): 

1285 datastore.put(metrics, ref) 

1286 self.assertFalse(datastore.exists(ref)) 

1287 

1288 # Again with ingest 

1289 with self.assertRaises(DatasetTypeNotSupportedError): 

1290 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1291 self.assertFalse(datastore.exists(ref)) 

1292 

1293 

1294class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1295 """Tests for datastore caching infrastructure.""" 

1296 

1297 @classmethod 

1298 def setUpClass(cls): 

1299 cls.storageClassFactory = StorageClassFactory() 

1300 cls.universe = DimensionUniverse() 

1301 

1302 # Ensure that we load the test storage class definitions. 

1303 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1304 cls.storageClassFactory.addFromConfig(scConfigFile) 

1305 

1306 def setUp(self): 

1307 self.id = 0 

1308 

1309 # Create a root that we can use for caching tests. 

1310 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1311 

1312 # Create some test dataset refs and associated test files 

1313 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1314 dimensions = self.universe.extract(("visit", "physical_filter")) 

1315 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1316 

1317 # Create list of refs and list of temporary files 

1318 n_datasets = 10 

1319 self.refs = [ 

1320 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False) 

1321 for n in range(n_datasets) 

1322 ] 

1323 

1324 root_uri = ResourcePath(self.root, forceDirectory=True) 

1325 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1326 

1327 # Create test files. 

1328 for uri in self.files: 

1329 uri.write(b"0123456789") 

1330 

1331 # Create some composite refs with component files. 

1332 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1333 self.composite_refs = [ 

1334 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3) 

1335 ] 

1336 self.comp_files = [] 

1337 self.comp_refs = [] 

1338 for n, ref in enumerate(self.composite_refs): 

1339 component_refs = [] 

1340 component_files = [] 

1341 for component in sc.components: 

1342 component_ref = ref.makeComponentRef(component) 

1343 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1344 component_refs.append(component_ref) 

1345 component_files.append(file) 

1346 file.write(b"9876543210") 

1347 

1348 self.comp_files.append(component_files) 

1349 self.comp_refs.append(component_refs) 

1350 

1351 def tearDown(self): 

1352 if self.root is not None and os.path.exists(self.root): 

1353 shutil.rmtree(self.root, ignore_errors=True) 

1354 

1355 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1356 config = Config.fromYaml(config_str) 

1357 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1358 

1359 def testNoCacheDir(self): 

1360 config_str = """ 

1361cached: 

1362 root: null 

1363 cacheable: 

1364 metric0: true 

1365 """ 

1366 cache_manager = self._make_cache_manager(config_str) 

1367 

1368 # Look inside to check we don't have a cache directory 

1369 self.assertIsNone(cache_manager._cache_directory) 

1370 

1371 self.assertCache(cache_manager) 

1372 

1373 # Test that the cache directory is marked temporary 

1374 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1375 

1376 def testNoCacheDirReversed(self): 

1377 """Use default caching status and metric1 to false""" 

1378 config_str = """ 

1379cached: 

1380 root: null 

1381 default: true 

1382 cacheable: 

1383 metric1: false 

1384 """ 

1385 cache_manager = self._make_cache_manager(config_str) 

1386 

1387 self.assertCache(cache_manager) 

1388 

1389 def testExplicitCacheDir(self): 

1390 config_str = f""" 

1391cached: 

1392 root: '{self.root}' 

1393 cacheable: 

1394 metric0: true 

1395 """ 

1396 cache_manager = self._make_cache_manager(config_str) 

1397 

1398 # Look inside to check we do have a cache directory. 

1399 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1400 

1401 self.assertCache(cache_manager) 

1402 

1403 # Test that the cache directory is not marked temporary 

1404 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1405 

1406 def assertCache(self, cache_manager): 

1407 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1408 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1409 

1410 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1411 self.assertIsInstance(uri, ResourcePath) 

1412 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1413 

1414 # Check presence in cache using ref and then using file extension. 

1415 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1416 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1417 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1418 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1419 

1420 # Cached file should no longer exist but uncached file should be 

1421 # unaffected. 

1422 self.assertFalse(self.files[0].exists()) 

1423 self.assertTrue(self.files[1].exists()) 

1424 

1425 # Should find this file and it should be within the cache directory. 

1426 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1427 self.assertTrue(found.exists()) 

1428 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1429 

1430 # Should not be able to find these in cache 

1431 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1432 self.assertIsNone(found) 

1433 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1434 self.assertIsNone(found) 

1435 

1436 def testNoCache(self): 

1437 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1438 for uri, ref in zip(self.files, self.refs): 

1439 self.assertFalse(cache_manager.should_be_cached(ref)) 

1440 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1441 self.assertFalse(cache_manager.known_to_cache(ref)) 

1442 with cache_manager.find_in_cache(ref, ".txt") as found: 

1443 self.assertIsNone(found, msg=f"{cache_manager}") 

1444 

1445 def _expiration_config(self, mode: str, threshold: int) -> str: 

1446 return f""" 

1447cached: 

1448 default: true 

1449 expiry: 

1450 mode: {mode} 

1451 threshold: {threshold} 

1452 cacheable: 

1453 unused: true 

1454 """ 

1455 

1456 def testCacheExpiryFiles(self): 

1457 threshold = 2 # Keep at least 2 files. 

1458 mode = "files" 

1459 config_str = self._expiration_config(mode, threshold) 

1460 

1461 cache_manager = self._make_cache_manager(config_str) 

1462 

1463 # Check that an empty cache returns unknown for arbitrary ref 

1464 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1465 

1466 # Should end with datasets: 2, 3, 4 

1467 self.assertExpiration(cache_manager, 5, threshold + 1) 

1468 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1469 

1470 # Check that we will not expire a file that is actively in use. 

1471 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1472 self.assertIsNotNone(found) 

1473 

1474 # Trigger cache expiration that should remove the file 

1475 # we just retrieved. Should now have: 3, 4, 5 

1476 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1477 self.assertIsNotNone(cached) 

1478 

1479 # Cache should still report the standard file count. 

1480 self.assertEqual(cache_manager.file_count, threshold + 1) 

1481 

1482 # Add additional entry to cache. 

1483 # Should now have 4, 5, 6 

1484 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1485 self.assertIsNotNone(cached) 

1486 

1487 # Is the file still there? 

1488 self.assertTrue(found.exists()) 

1489 

1490 # Can we read it? 

1491 data = found.read() 

1492 self.assertGreater(len(data), 0) 

1493 

1494 # Outside context the file should no longer exist. 

1495 self.assertFalse(found.exists()) 

1496 

1497 # File count should not have changed. 

1498 self.assertEqual(cache_manager.file_count, threshold + 1) 

1499 

1500 # Dataset 2 was in the exempt directory but because hardlinks 

1501 # are used it was deleted from the main cache during cache expiry 

1502 # above and so should no longer be found. 

1503 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1504 self.assertIsNone(found) 

1505 

1506 # And the one stored after it is also gone. 

1507 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1508 self.assertIsNone(found) 

1509 

1510 # But dataset 4 is present. 

1511 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1512 self.assertIsNotNone(found) 

1513 

1514 # Adding a new dataset to the cache should now delete it. 

1515 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1516 

1517 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1518 self.assertIsNone(found) 

1519 

1520 def testCacheExpiryDatasets(self): 

1521 threshold = 2 # Keep 2 datasets. 

1522 mode = "datasets" 

1523 config_str = self._expiration_config(mode, threshold) 

1524 

1525 cache_manager = self._make_cache_manager(config_str) 

1526 self.assertExpiration(cache_manager, 5, threshold + 1) 

1527 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1528 

1529 def testCacheExpiryDatasetsComposite(self): 

1530 threshold = 2 # Keep 2 datasets. 

1531 mode = "datasets" 

1532 config_str = self._expiration_config(mode, threshold) 

1533 

1534 cache_manager = self._make_cache_manager(config_str) 

1535 

1536 n_datasets = 3 

1537 for i in range(n_datasets): 

1538 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1539 cached = cache_manager.move_to_cache(component_file, component_ref) 

1540 self.assertIsNotNone(cached) 

1541 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1542 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1543 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1544 

1545 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1546 

1547 # Write two new non-composite and the number of files should drop. 

1548 self.assertExpiration(cache_manager, 2, 5) 

1549 

1550 def testCacheExpirySize(self): 

1551 threshold = 55 # Each file is 10 bytes 

1552 mode = "size" 

1553 config_str = self._expiration_config(mode, threshold) 

1554 

1555 cache_manager = self._make_cache_manager(config_str) 

1556 self.assertExpiration(cache_manager, 10, 6) 

1557 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1558 

1559 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1560 """Insert the datasets and then check the number retained.""" 

1561 for i in range(n_datasets): 

1562 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1563 self.assertIsNotNone(cached) 

1564 

1565 self.assertEqual(cache_manager.file_count, n_retained) 

1566 

1567 # The oldest file should not be in the cache any more. 

1568 for i in range(n_datasets): 

1569 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1570 if i >= n_datasets - n_retained: 

1571 self.assertIsInstance(found, ResourcePath) 

1572 else: 

1573 self.assertIsNone(found) 

1574 

1575 def testCacheExpiryAge(self): 

1576 threshold = 1 # Expire older than 2 seconds 

1577 mode = "age" 

1578 config_str = self._expiration_config(mode, threshold) 

1579 

1580 cache_manager = self._make_cache_manager(config_str) 

1581 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1582 

1583 # Insert 3 files, then sleep, then insert more. 

1584 for i in range(2): 

1585 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1586 self.assertIsNotNone(cached) 

1587 time.sleep(2.0) 

1588 for j in range(4): 

1589 i = 2 + j # Continue the counting 

1590 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1591 self.assertIsNotNone(cached) 

1592 

1593 # Only the files written after the sleep should exist. 

1594 self.assertEqual(cache_manager.file_count, 4) 

1595 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1596 self.assertIsNone(found) 

1597 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1598 self.assertIsInstance(found, ResourcePath) 

1599 

1600 

1601class DatasetRefURIsTestCase(unittest.TestCase): 

1602 """Tests for DatasetRefURIs.""" 

1603 

1604 def testSequenceAccess(self): 

1605 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1606 uris = DatasetRefURIs() 

1607 

1608 self.assertEqual(len(uris), 2) 

1609 self.assertEqual(uris[0], None) 

1610 self.assertEqual(uris[1], {}) 

1611 

1612 primaryURI = ResourcePath("1/2/3") 

1613 componentURI = ResourcePath("a/b/c") 

1614 

1615 # affirm that DatasetRefURIs does not support MutableSequence functions 

1616 with self.assertRaises(TypeError): 

1617 uris[0] = primaryURI 

1618 with self.assertRaises(TypeError): 

1619 uris[1] = {"foo": componentURI} 

1620 

1621 # but DatasetRefURIs can be set by property name: 

1622 uris.primaryURI = primaryURI 

1623 uris.componentURIs = {"foo": componentURI} 

1624 self.assertEqual(uris.primaryURI, primaryURI) 

1625 self.assertEqual(uris[0], primaryURI) 

1626 

1627 primary, components = uris 

1628 self.assertEqual(primary, primaryURI) 

1629 self.assertEqual(components, {"foo": componentURI}) 

1630 

1631 def testRepr(self): 

1632 """Verify __repr__ output.""" 

1633 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")}) 

1634 self.assertEqual( 

1635 repr(uris), 

1636 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), ' 

1637 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})', 

1638 ) 

1639 

1640 

1641class DataIdForTestTestCase(unittest.TestCase): 

1642 """Tests for the DataIdForTest class.""" 

1643 

1644 def testImmutable(self): 

1645 """Verify that an instance is immutable by default.""" 

1646 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1647 initial_hash = hash(dataId) 

1648 

1649 with self.assertRaises(RuntimeError): 

1650 dataId["instrument"] = "foo" 

1651 

1652 with self.assertRaises(RuntimeError): 

1653 del dataId["instrument"] 

1654 

1655 assert sys.version_info[0] == 3 

1656 if sys.version_info[1] >= 9: 

1657 with self.assertRaises(RuntimeError): 

1658 dataId |= dict(foo="bar") 

1659 

1660 with self.assertRaises(RuntimeError): 

1661 dataId.pop("instrument") 

1662 

1663 with self.assertRaises(RuntimeError): 

1664 dataId.popitem() 

1665 

1666 with self.assertRaises(RuntimeError): 

1667 dataId.update(dict(instrument="foo")) 

1668 

1669 # verify that the hash value has not changed. 

1670 self.assertEqual(initial_hash, hash(dataId)) 

1671 

1672 def testMutable(self): 

1673 """Verify that an instance can be made mutable (unfrozen).""" 

1674 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1675 initial_hash = hash(dataId) 

1676 dataId.frozen = False 

1677 self.assertEqual(initial_hash, hash(dataId)) 

1678 

1679 dataId["instrument"] = "foo" 

1680 self.assertEqual(dataId["instrument"], "foo") 

1681 self.assertNotEqual(initial_hash, hash(dataId)) 

1682 initial_hash = hash(dataId) 

1683 

1684 del dataId["instrument"] 

1685 self.assertTrue("instrument" not in dataId) 

1686 self.assertNotEqual(initial_hash, hash(dataId)) 

1687 initial_hash = hash(dataId) 

1688 

1689 assert sys.version_info[0] == 3 

1690 if sys.version_info[1] >= 9: 

1691 dataId |= dict(foo="bar") 

1692 self.assertEqual(dataId["foo"], "bar") 

1693 self.assertNotEqual(initial_hash, hash(dataId)) 

1694 initial_hash = hash(dataId) 

1695 

1696 dataId.pop("visit") 

1697 self.assertTrue("visit" not in dataId) 

1698 self.assertNotEqual(initial_hash, hash(dataId)) 

1699 initial_hash = hash(dataId) 

1700 

1701 dataId.popitem() 

1702 self.assertTrue("physical_filter" not in dataId) 

1703 self.assertNotEqual(initial_hash, hash(dataId)) 

1704 initial_hash = hash(dataId) 

1705 

1706 dataId.update(dict(instrument="foo")) 

1707 self.assertEqual(dataId["instrument"], "foo") 

1708 self.assertNotEqual(initial_hash, hash(dataId)) 

1709 initial_hash = hash(dataId) 

1710 

1711 

1712if __name__ == "__main__": 1712 ↛ 1713line 1712 didn't jump to line 1713, because the condition on line 1712 was never true

1713 unittest.main()