Coverage for tests/test_datastore.py: 12%

975 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-02 18:18 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import shutil 

24import sys 

25import tempfile 

26import time 

27import unittest 

28from collections import UserDict 

29from dataclasses import dataclass 

30 

31import lsst.utils.tests 

32import yaml 

33from lsst.daf.butler import ( 

34 Config, 

35 DatasetRefURIs, 

36 DatasetTypeNotSupportedError, 

37 DatastoreCacheManager, 

38 DatastoreCacheManagerConfig, 

39 DatastoreConfig, 

40 DatastoreDisabledCacheManager, 

41 DatastoreValidationError, 

42 DimensionUniverse, 

43 FileDataset, 

44 NamedKeyDict, 

45 StorageClass, 

46 StorageClassFactory, 

47) 

48from lsst.daf.butler.formatters.yaml import YamlFormatter 

49from lsst.daf.butler.tests import ( 

50 BadNoWriteFormatter, 

51 BadWriteFormatter, 

52 DatasetTestHelper, 

53 DatastoreTestHelper, 

54 DummyRegistry, 

55 MetricsExample, 

56) 

57from lsst.resources import ResourcePath 

58from lsst.utils import doImport 

59 

60TESTDIR = os.path.dirname(__file__) 

61 

62 

63class DataIdForTest(UserDict): 

64 

65 """A dict-like class that can be used for a DataId dict that is hashable. 

66 

67 By default the class is immutable ("frozen"). The `frozen` 

68 attribute can be set to `False` to change values (but note that 

69 the hash values before and after mutation will be different!). 

70 """ 

71 

72 def __init__(self, *args, **kwargs): 

73 self.frozen = False 

74 super().__init__(*args, **kwargs) 

75 self.frozen = True 

76 

77 def __hash__(self): 

78 return hash(str(self.data)) 

79 

80 def __setitem__(self, k, v): 

81 if self.frozen: 

82 raise RuntimeError("DataIdForTest is frozen.") 

83 return super().__setitem__(k, v) 

84 

85 def __delitem__(self, k): 

86 if self.frozen: 

87 raise RuntimeError("DataIdForTest is frozen.") 

88 return super().__delitem__(k) 

89 

90 def __ior__(self, other): 

91 assert sys.version_info[0] == 3 

92 if sys.version_info[1] < 9: 

93 raise NotImplementedError("operator |= (ior) is not supported before version 3.9") 

94 if self.frozen: 

95 raise RuntimeError("DataIdForTest is frozen.") 

96 return super().__ior__(other) 

97 

98 def pop(self, k): 

99 if self.frozen: 

100 raise RuntimeError("DataIdForTest is frozen.") 

101 return super().pop(k) 

102 

103 def popitem(self): 

104 if self.frozen: 

105 raise RuntimeError("DataIdForTest is frozen.") 

106 return super().popitem() 

107 

108 def update(self, *args, **kwargs): 

109 if self.frozen: 

110 raise RuntimeError("DataIdForTest is frozen.") 

111 super().update(*args, **kwargs) 

112 

113 

114def makeExampleMetrics(use_none=False): 

115 if use_none: 

116 array = None 

117 else: 

118 array = [563, 234, 456.7, 105, 2054, -1045] 

119 return MetricsExample( 

120 {"AM1": 5.2, "AM2": 30.6}, 

121 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

122 array, 

123 ) 

124 

125 

126@dataclass(frozen=True) 

127class Named: 

128 name: str 

129 

130 

131class FakeDataCoordinate(NamedKeyDict): 

132 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

133 

134 @classmethod 

135 def from_dict(cls, dataId): 

136 new = cls() 

137 for k, v in dataId.items(): 

138 new[Named(k)] = v 

139 return new.freeze() 

140 

141 def __hash__(self) -> int: 

142 return hash(frozenset(self.items())) 

143 

144 

145class TransactionTestError(Exception): 

146 """Specific error for transactions, to prevent misdiagnosing 

147 that might otherwise occur when a standard exception is used. 

148 """ 

149 

150 pass 

151 

152 

153class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

154 """Support routines for datastore testing""" 

155 

156 root = None 

157 

158 @classmethod 

159 def setUpClass(cls): 

160 # Storage Classes are fixed for all datastores in these tests 

161 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

162 cls.storageClassFactory = StorageClassFactory() 

163 cls.storageClassFactory.addFromConfig(scConfigFile) 

164 

165 # Read the Datastore config so we can get the class 

166 # information (since we should not assume the constructor 

167 # name here, but rely on the configuration file itself) 

168 datastoreConfig = DatastoreConfig(cls.configFile) 

169 cls.datastoreType = doImport(datastoreConfig["cls"]) 

170 cls.universe = DimensionUniverse() 

171 

172 def setUp(self): 

173 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

174 

175 def tearDown(self): 

176 if self.root is not None and os.path.exists(self.root): 

177 shutil.rmtree(self.root, ignore_errors=True) 

178 

179 

180class DatastoreTests(DatastoreTestsBase): 

181 """Some basic tests of a simple datastore.""" 

182 

183 hasUnsupportedPut = True 

184 

185 def testConfigRoot(self): 

186 full = DatastoreConfig(self.configFile) 

187 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

188 newroot = "/random/location" 

189 self.datastoreType.setConfigRoot(newroot, config, full) 

190 if self.rootKeys: 

191 for k in self.rootKeys: 

192 self.assertIn(newroot, config[k]) 

193 

194 def testConstructor(self): 

195 datastore = self.makeDatastore() 

196 self.assertIsNotNone(datastore) 

197 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

198 

199 def testConfigurationValidation(self): 

200 datastore = self.makeDatastore() 

201 sc = self.storageClassFactory.getStorageClass("ThingOne") 

202 datastore.validateConfiguration([sc]) 

203 

204 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

205 if self.validationCanFail: 

206 with self.assertRaises(DatastoreValidationError): 

207 datastore.validateConfiguration([sc2], logFailures=True) 

208 

209 dimensions = self.universe.extract(("visit", "physical_filter")) 

210 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

211 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

212 datastore.validateConfiguration([ref]) 

213 

214 def testParameterValidation(self): 

215 """Check that parameters are validated""" 

216 sc = self.storageClassFactory.getStorageClass("ThingOne") 

217 dimensions = self.universe.extract(("visit", "physical_filter")) 

218 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

219 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

220 datastore = self.makeDatastore() 

221 data = {1: 2, 3: 4} 

222 datastore.put(data, ref) 

223 newdata = datastore.get(ref) 

224 self.assertEqual(data, newdata) 

225 with self.assertRaises(KeyError): 

226 newdata = datastore.get(ref, parameters={"missing": 5}) 

227 

228 def testBasicPutGet(self): 

229 metrics = makeExampleMetrics() 

230 datastore = self.makeDatastore() 

231 

232 # Create multiple storage classes for testing different formulations 

233 storageClasses = [ 

234 self.storageClassFactory.getStorageClass(sc) 

235 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

236 ] 

237 

238 dimensions = self.universe.extract(("visit", "physical_filter")) 

239 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

240 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"}) 

241 

242 for sc in storageClasses: 

243 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

244 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False) 

245 

246 # Make sure that using getManyURIs without predicting before the 

247 # dataset has been put raises. 

248 with self.assertRaises(FileNotFoundError): 

249 datastore.getManyURIs([ref], predict=False) 

250 

251 # Make sure that using getManyURIs with predicting before the 

252 # dataset has been put predicts the URI. 

253 uris = datastore.getManyURIs([ref, ref2], predict=True) 

254 self.assertIn("52", uris[ref].primaryURI.geturl()) 

255 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

256 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

257 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

258 

259 datastore.put(metrics, ref) 

260 

261 # Does it exist? 

262 self.assertTrue(datastore.exists(ref)) 

263 

264 # Get 

265 metricsOut = datastore.get(ref, parameters=None) 

266 self.assertEqual(metrics, metricsOut) 

267 

268 uri = datastore.getURI(ref) 

269 self.assertEqual(uri.scheme, self.uriScheme) 

270 

271 uris = datastore.getManyURIs([ref]) 

272 self.assertEqual(len(uris), 1) 

273 ref, uri = uris.popitem() 

274 self.assertTrue(uri.primaryURI.exists()) 

275 self.assertFalse(uri.componentURIs) 

276 

277 # Get a component -- we need to construct new refs for them 

278 # with derived storage classes but with parent ID 

279 for comp in ("data", "output"): 

280 compRef = ref.makeComponentRef(comp) 

281 output = datastore.get(compRef) 

282 self.assertEqual(output, getattr(metricsOut, comp)) 

283 

284 uri = datastore.getURI(compRef) 

285 self.assertEqual(uri.scheme, self.uriScheme) 

286 

287 uris = datastore.getManyURIs([compRef]) 

288 self.assertEqual(len(uris), 1) 

289 

290 storageClass = sc 

291 

292 # Check that we can put a metric with None in a component and 

293 # get it back as None 

294 metricsNone = makeExampleMetrics(use_none=True) 

295 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

296 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

297 datastore.put(metricsNone, refNone) 

298 

299 comp = "data" 

300 for comp in ("data", "output"): 

301 compRef = refNone.makeComponentRef(comp) 

302 output = datastore.get(compRef) 

303 self.assertEqual(output, getattr(metricsNone, comp)) 

304 

305 # Check that a put fails if the dataset type is not supported 

306 if self.hasUnsupportedPut: 

307 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

308 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

309 with self.assertRaises(DatasetTypeNotSupportedError): 

310 datastore.put(metrics, ref) 

311 

312 # These should raise 

313 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

314 with self.assertRaises(FileNotFoundError): 

315 # non-existing file 

316 datastore.get(ref) 

317 

318 # Get a URI from it 

319 uri = datastore.getURI(ref, predict=True) 

320 self.assertEqual(uri.scheme, self.uriScheme) 

321 

322 with self.assertRaises(FileNotFoundError): 

323 datastore.getURI(ref) 

324 

325 def testTrustGetRequest(self): 

326 """Check that we can get datasets that registry knows nothing about.""" 

327 

328 datastore = self.makeDatastore() 

329 

330 # Skip test if the attribute is not defined 

331 if not hasattr(datastore, "trustGetRequest"): 

332 return 

333 

334 metrics = makeExampleMetrics() 

335 

336 i = 0 

337 for sc_name in ("StructuredData", "StructuredComposite"): 

338 i += 1 

339 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

340 

341 if sc_name == "StructuredComposite": 

342 disassembled = True 

343 else: 

344 disassembled = False 

345 

346 # Start datastore in default configuration of using registry 

347 datastore.trustGetRequest = False 

348 

349 # Create multiple storage classes for testing with or without 

350 # disassembly 

351 sc = self.storageClassFactory.getStorageClass(sc_name) 

352 dimensions = self.universe.extract(("visit", "physical_filter")) 

353 

354 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}) 

355 

356 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

357 datastore.put(metrics, ref) 

358 

359 # Does it exist? 

360 self.assertTrue(datastore.exists(ref)) 

361 

362 # Get 

363 metricsOut = datastore.get(ref) 

364 self.assertEqual(metrics, metricsOut) 

365 

366 # Get the URI(s) 

367 primaryURI, componentURIs = datastore.getURIs(ref) 

368 if disassembled: 

369 self.assertIsNone(primaryURI) 

370 self.assertEqual(len(componentURIs), 3) 

371 else: 

372 self.assertIn(datasetTypeName, primaryURI.path) 

373 self.assertFalse(componentURIs) 

374 

375 # Delete registry entry so now we are trusting 

376 datastore.removeStoredItemInfo(ref) 

377 

378 # Now stop trusting and check that things break 

379 datastore.trustGetRequest = False 

380 

381 # Does it exist? 

382 self.assertFalse(datastore.exists(ref)) 

383 

384 with self.assertRaises(FileNotFoundError): 

385 datastore.get(ref) 

386 

387 with self.assertRaises(FileNotFoundError): 

388 datastore.get(ref.makeComponentRef("data")) 

389 

390 # URI should fail unless we ask for prediction 

391 with self.assertRaises(FileNotFoundError): 

392 datastore.getURIs(ref) 

393 

394 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

395 if disassembled: 

396 self.assertIsNone(predicted_primary) 

397 self.assertEqual(len(predicted_disassembled), 3) 

398 for uri in predicted_disassembled.values(): 

399 self.assertEqual(uri.fragment, "predicted") 

400 self.assertIn(datasetTypeName, uri.path) 

401 else: 

402 self.assertIn(datasetTypeName, predicted_primary.path) 

403 self.assertFalse(predicted_disassembled) 

404 self.assertEqual(predicted_primary.fragment, "predicted") 

405 

406 # Now enable registry-free trusting mode 

407 datastore.trustGetRequest = True 

408 

409 # Try again to get it 

410 metricsOut = datastore.get(ref) 

411 self.assertEqual(metricsOut, metrics) 

412 

413 # Does it exist? 

414 self.assertTrue(datastore.exists(ref)) 

415 

416 # Get a component 

417 comp = "data" 

418 compRef = ref.makeComponentRef(comp) 

419 output = datastore.get(compRef) 

420 self.assertEqual(output, getattr(metrics, comp)) 

421 

422 # Get the URI -- if we trust this should work even without 

423 # enabling prediction. 

424 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

425 self.assertEqual(primaryURI2, primaryURI) 

426 self.assertEqual(componentURIs2, componentURIs) 

427 

428 def testDisassembly(self): 

429 """Test disassembly within datastore.""" 

430 metrics = makeExampleMetrics() 

431 if self.isEphemeral: 

432 # in-memory datastore does not disassemble 

433 return 

434 

435 # Create multiple storage classes for testing different formulations 

436 # of composites. One of these will not disassemble to provide 

437 # a reference. 

438 storageClasses = [ 

439 self.storageClassFactory.getStorageClass(sc) 

440 for sc in ( 

441 "StructuredComposite", 

442 "StructuredCompositeTestA", 

443 "StructuredCompositeTestB", 

444 "StructuredCompositeReadComp", 

445 "StructuredData", # No disassembly 

446 "StructuredCompositeReadCompNoDisassembly", 

447 ) 

448 ] 

449 

450 # Create the test datastore 

451 datastore = self.makeDatastore() 

452 

453 # Dummy dataId 

454 dimensions = self.universe.extract(("visit", "physical_filter")) 

455 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

456 

457 for i, sc in enumerate(storageClasses): 

458 with self.subTest(storageClass=sc.name): 

459 # Create a different dataset type each time round 

460 # so that a test failure in this subtest does not trigger 

461 # a cascade of tests because of file clashes 

462 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False) 

463 

464 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

465 

466 datastore.put(metrics, ref) 

467 

468 baseURI, compURIs = datastore.getURIs(ref) 

469 if disassembled: 

470 self.assertIsNone(baseURI) 

471 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

472 else: 

473 self.assertIsNotNone(baseURI) 

474 self.assertEqual(compURIs, {}) 

475 

476 metrics_get = datastore.get(ref) 

477 self.assertEqual(metrics_get, metrics) 

478 

479 # Retrieve the composite with read parameter 

480 stop = 4 

481 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

482 self.assertEqual(metrics_get.summary, metrics.summary) 

483 self.assertEqual(metrics_get.output, metrics.output) 

484 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

485 

486 # Retrieve a component 

487 data = datastore.get(ref.makeComponentRef("data")) 

488 self.assertEqual(data, metrics.data) 

489 

490 # On supported storage classes attempt to access a read 

491 # only component 

492 if "ReadComp" in sc.name: 

493 cRef = ref.makeComponentRef("counter") 

494 counter = datastore.get(cRef) 

495 self.assertEqual(counter, len(metrics.data)) 

496 

497 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

498 self.assertEqual(counter, stop) 

499 

500 datastore.remove(ref) 

501 

502 def testRegistryCompositePutGet(self): 

503 """Tests the case where registry disassembles and puts to datastore.""" 

504 metrics = makeExampleMetrics() 

505 datastore = self.makeDatastore() 

506 

507 # Create multiple storage classes for testing different formulations 

508 # of composites 

509 storageClasses = [ 

510 self.storageClassFactory.getStorageClass(sc) 

511 for sc in ( 

512 "StructuredComposite", 

513 "StructuredCompositeTestA", 

514 "StructuredCompositeTestB", 

515 ) 

516 ] 

517 

518 dimensions = self.universe.extract(("visit", "physical_filter")) 

519 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

520 

521 for sc in storageClasses: 

522 print("Using storageClass: {}".format(sc.name)) 

523 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

524 

525 components = sc.delegate().disassemble(metrics) 

526 self.assertTrue(components) 

527 

528 compsRead = {} 

529 for compName, compInfo in components.items(): 

530 compRef = self.makeDatasetRef( 

531 ref.datasetType.componentTypeName(compName), 

532 dimensions, 

533 components[compName].storageClass, 

534 dataId, 

535 conform=False, 

536 ) 

537 

538 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name)) 

539 datastore.put(compInfo.component, compRef) 

540 

541 uri = datastore.getURI(compRef) 

542 self.assertEqual(uri.scheme, self.uriScheme) 

543 

544 compsRead[compName] = datastore.get(compRef) 

545 

546 # We can generate identical files for each storage class 

547 # so remove the component here 

548 datastore.remove(compRef) 

549 

550 # combine all the components we read back into a new composite 

551 metricsOut = sc.delegate().assemble(compsRead) 

552 self.assertEqual(metrics, metricsOut) 

553 

554 def prepDeleteTest(self, n_refs=1): 

555 metrics = makeExampleMetrics() 

556 datastore = self.makeDatastore() 

557 # Put 

558 dimensions = self.universe.extract(("visit", "physical_filter")) 

559 sc = self.storageClassFactory.getStorageClass("StructuredData") 

560 refs = [] 

561 for i in range(n_refs): 

562 dataId = FakeDataCoordinate.from_dict( 

563 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"} 

564 ) 

565 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

566 datastore.put(metrics, ref) 

567 

568 # Does it exist? 

569 self.assertTrue(datastore.exists(ref)) 

570 

571 # Get 

572 metricsOut = datastore.get(ref) 

573 self.assertEqual(metrics, metricsOut) 

574 refs.append(ref) 

575 

576 return datastore, *refs 

577 

578 def testRemove(self): 

579 datastore, ref = self.prepDeleteTest() 

580 

581 # Remove 

582 datastore.remove(ref) 

583 

584 # Does it exist? 

585 self.assertFalse(datastore.exists(ref)) 

586 

587 # Do we now get a predicted URI? 

588 uri = datastore.getURI(ref, predict=True) 

589 self.assertEqual(uri.fragment, "predicted") 

590 

591 # Get should now fail 

592 with self.assertRaises(FileNotFoundError): 

593 datastore.get(ref) 

594 # Can only delete once 

595 with self.assertRaises(FileNotFoundError): 

596 datastore.remove(ref) 

597 

598 def testForget(self): 

599 datastore, ref = self.prepDeleteTest() 

600 

601 # Remove 

602 datastore.forget([ref]) 

603 

604 # Does it exist (as far as we know)? 

605 self.assertFalse(datastore.exists(ref)) 

606 

607 # Do we now get a predicted URI? 

608 uri = datastore.getURI(ref, predict=True) 

609 self.assertEqual(uri.fragment, "predicted") 

610 

611 # Get should now fail 

612 with self.assertRaises(FileNotFoundError): 

613 datastore.get(ref) 

614 

615 # Forgetting again is a silent no-op 

616 datastore.forget([ref]) 

617 

618 # Predicted URI should still point to the file. 

619 self.assertTrue(uri.exists()) 

620 

621 def testTransfer(self): 

622 metrics = makeExampleMetrics() 

623 

624 dimensions = self.universe.extract(("visit", "physical_filter")) 

625 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"} 

626 

627 sc = self.storageClassFactory.getStorageClass("StructuredData") 

628 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

629 

630 inputDatastore = self.makeDatastore("test_input_datastore") 

631 outputDatastore = self.makeDatastore("test_output_datastore") 

632 

633 inputDatastore.put(metrics, ref) 

634 outputDatastore.transfer(inputDatastore, ref) 

635 

636 metricsOut = outputDatastore.get(ref) 

637 self.assertEqual(metrics, metricsOut) 

638 

639 def testBasicTransaction(self): 

640 datastore = self.makeDatastore() 

641 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

642 dimensions = self.universe.extract(("visit", "physical_filter")) 

643 nDatasets = 6 

644 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] 

645 data = [ 

646 ( 

647 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

648 makeExampleMetrics(), 

649 ) 

650 for dataId in dataIds 

651 ] 

652 succeed = data[: nDatasets // 2] 

653 fail = data[nDatasets // 2 :] 

654 # All datasets added in this transaction should continue to exist 

655 with datastore.transaction(): 

656 for ref, metrics in succeed: 

657 datastore.put(metrics, ref) 

658 # Whereas datasets added in this transaction should not 

659 with self.assertRaises(TransactionTestError): 

660 with datastore.transaction(): 

661 for ref, metrics in fail: 

662 datastore.put(metrics, ref) 

663 raise TransactionTestError("This should propagate out of the context manager") 

664 # Check for datasets that should exist 

665 for ref, metrics in succeed: 

666 # Does it exist? 

667 self.assertTrue(datastore.exists(ref)) 

668 # Get 

669 metricsOut = datastore.get(ref, parameters=None) 

670 self.assertEqual(metrics, metricsOut) 

671 # URI 

672 uri = datastore.getURI(ref) 

673 self.assertEqual(uri.scheme, self.uriScheme) 

674 # Check for datasets that should not exist 

675 for ref, _ in fail: 

676 # These should raise 

677 with self.assertRaises(FileNotFoundError): 

678 # non-existing file 

679 datastore.get(ref) 

680 with self.assertRaises(FileNotFoundError): 

681 datastore.getURI(ref) 

682 

683 def testNestedTransaction(self): 

684 datastore = self.makeDatastore() 

685 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

686 dimensions = self.universe.extract(("visit", "physical_filter")) 

687 metrics = makeExampleMetrics() 

688 

689 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

690 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

691 datastore.put(metrics, refBefore) 

692 with self.assertRaises(TransactionTestError): 

693 with datastore.transaction(): 

694 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} 

695 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

696 datastore.put(metrics, refOuter) 

697 with datastore.transaction(): 

698 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} 

699 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

700 datastore.put(metrics, refInner) 

701 # All datasets should exist 

702 for ref in (refBefore, refOuter, refInner): 

703 metricsOut = datastore.get(ref, parameters=None) 

704 self.assertEqual(metrics, metricsOut) 

705 raise TransactionTestError("This should roll back the transaction") 

706 # Dataset(s) inserted before the transaction should still exist 

707 metricsOut = datastore.get(refBefore, parameters=None) 

708 self.assertEqual(metrics, metricsOut) 

709 # But all datasets inserted during the (rolled back) transaction 

710 # should be gone 

711 with self.assertRaises(FileNotFoundError): 

712 datastore.get(refOuter) 

713 with self.assertRaises(FileNotFoundError): 

714 datastore.get(refInner) 

715 

716 def _prepareIngestTest(self): 

717 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

718 dimensions = self.universe.extract(("visit", "physical_filter")) 

719 metrics = makeExampleMetrics() 

720 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

721 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

722 return metrics, ref 

723 

724 def runIngestTest(self, func, expectOutput=True): 

725 metrics, ref = self._prepareIngestTest() 

726 # The file will be deleted after the test. 

727 # For symlink tests this leads to a situation where the datastore 

728 # points to a file that does not exist. This will make os.path.exist 

729 # return False but then the new symlink will fail with 

730 # FileExistsError later in the code so the test still passes. 

731 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

732 with open(path, "w") as fd: 

733 yaml.dump(metrics._asdict(), stream=fd) 

734 func(metrics, path, ref) 

735 

736 def testIngestNoTransfer(self): 

737 """Test ingesting existing files with no transfer.""" 

738 for mode in (None, "auto"): 

739 # Some datastores have auto but can't do in place transfer 

740 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

741 continue 

742 

743 with self.subTest(mode=mode): 

744 datastore = self.makeDatastore() 

745 

746 def succeed(obj, path, ref): 

747 """Ingest a file already in the datastore root.""" 

748 # first move it into the root, and adjust the path 

749 # accordingly 

750 path = shutil.copy(path, datastore.root.ospath) 

751 path = os.path.relpath(path, start=datastore.root.ospath) 

752 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

753 self.assertEqual(obj, datastore.get(ref)) 

754 

755 def failInputDoesNotExist(obj, path, ref): 

756 """Can't ingest files if we're given a bad path.""" 

757 with self.assertRaises(FileNotFoundError): 

758 datastore.ingest( 

759 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

760 ) 

761 self.assertFalse(datastore.exists(ref)) 

762 

763 def failOutsideRoot(obj, path, ref): 

764 """Can't ingest files outside of datastore root unless 

765 auto.""" 

766 if mode == "auto": 

767 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

768 self.assertTrue(datastore.exists(ref)) 

769 else: 

770 with self.assertRaises(RuntimeError): 

771 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

772 self.assertFalse(datastore.exists(ref)) 

773 

774 def failNotImplemented(obj, path, ref): 

775 with self.assertRaises(NotImplementedError): 

776 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

777 

778 if mode in self.ingestTransferModes: 

779 self.runIngestTest(failOutsideRoot) 

780 self.runIngestTest(failInputDoesNotExist) 

781 self.runIngestTest(succeed) 

782 else: 

783 self.runIngestTest(failNotImplemented) 

784 

785 def testIngestTransfer(self): 

786 """Test ingesting existing files after transferring them.""" 

787 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

788 with self.subTest(mode=mode): 

789 datastore = self.makeDatastore(mode) 

790 

791 def succeed(obj, path, ref): 

792 """Ingest a file by transferring it to the template 

793 location.""" 

794 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

795 self.assertEqual(obj, datastore.get(ref)) 

796 

797 def failInputDoesNotExist(obj, path, ref): 

798 """Can't ingest files if we're given a bad path.""" 

799 with self.assertRaises(FileNotFoundError): 

800 # Ensure the file does not look like it is in 

801 # datastore for auto mode 

802 datastore.ingest( 

803 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

804 ) 

805 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

806 

807 def failNotImplemented(obj, path, ref): 

808 with self.assertRaises(NotImplementedError): 

809 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

810 

811 if mode in self.ingestTransferModes: 

812 self.runIngestTest(failInputDoesNotExist) 

813 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

814 else: 

815 self.runIngestTest(failNotImplemented) 

816 

817 def testIngestSymlinkOfSymlink(self): 

818 """Special test for symlink to a symlink ingest""" 

819 metrics, ref = self._prepareIngestTest() 

820 # The aim of this test is to create a dataset on disk, then 

821 # create a symlink to it and finally ingest the symlink such that 

822 # the symlink in the datastore points to the original dataset. 

823 for mode in ("symlink", "relsymlink"): 

824 if mode not in self.ingestTransferModes: 

825 continue 

826 

827 print(f"Trying mode {mode}") 

828 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

829 with open(realpath, "w") as fd: 

830 yaml.dump(metrics._asdict(), stream=fd) 

831 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

832 os.symlink(os.path.abspath(realpath), sympath) 

833 

834 datastore = self.makeDatastore() 

835 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

836 

837 uri = datastore.getURI(ref) 

838 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

839 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

840 

841 linkTarget = os.readlink(uri.ospath) 

842 if mode == "relsymlink": 

843 self.assertFalse(os.path.isabs(linkTarget)) 

844 else: 

845 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

846 

847 # Check that we can get the dataset back regardless of mode 

848 metric2 = datastore.get(ref) 

849 self.assertEqual(metric2, metrics) 

850 

851 # Cleanup the file for next time round loop 

852 # since it will get the same file name in store 

853 datastore.remove(ref) 

854 

855 def testExportImportRecords(self): 

856 """Test for export_records and import_records methods.""" 

857 

858 datastore = self.makeDatastore("test_datastore") 

859 

860 # For now only the FileDatastore can be used for this test. 

861 # ChainedDatastore that only includes InMemoryDatastores have to be 

862 # skipped as well. 

863 for name in datastore.names: 

864 if not name.startswith("InMemoryDatastore"): 

865 break 

866 else: 

867 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

868 

869 metrics = makeExampleMetrics() 

870 dimensions = self.universe.extract(("visit", "physical_filter")) 

871 sc = self.storageClassFactory.getStorageClass("StructuredData") 

872 

873 refs = [] 

874 for visit in (2048, 2049, 2050): 

875 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"} 

876 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

877 datastore.put(metrics, ref) 

878 refs.append(ref) 

879 

880 for exported_refs in (refs, refs[1:]): 

881 n_refs = len(exported_refs) 

882 records = datastore.export_records(exported_refs) 

883 self.assertGreater(len(records), 0) 

884 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

885 # In a ChainedDatastore each FileDatastore will have a complete set 

886 for datastore_name in records: 

887 record_data = records[datastore_name] 

888 self.assertEqual(len(record_data.records), n_refs) 

889 

890 # Use the same datastore name to import relative path. 

891 datastore2 = self.makeDatastore("test_datastore") 

892 

893 records = datastore.export_records(refs[1:]) 

894 datastore2.import_records(records) 

895 

896 with self.assertRaises(FileNotFoundError): 

897 data = datastore2.get(refs[0]) 

898 data = datastore2.get(refs[1]) 

899 self.assertIsNotNone(data) 

900 data = datastore2.get(refs[2]) 

901 self.assertIsNotNone(data) 

902 

903 

904class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

905 """PosixDatastore specialization""" 

906 

907 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

908 uriScheme = "file" 

909 canIngestNoTransferAuto = True 

910 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

911 isEphemeral = False 

912 rootKeys = ("root",) 

913 validationCanFail = True 

914 

915 def setUp(self): 

916 # Override the working directory before calling the base class 

917 self.root = tempfile.mkdtemp(dir=TESTDIR) 

918 super().setUp() 

919 

920 def testAtomicWrite(self): 

921 """Test that we write to a temporary and then rename""" 

922 datastore = self.makeDatastore() 

923 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

924 dimensions = self.universe.extract(("visit", "physical_filter")) 

925 metrics = makeExampleMetrics() 

926 

927 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

928 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

929 

930 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

931 datastore.put(metrics, ref) 

932 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

933 self.assertIn("transfer=move", move_logs[0]) 

934 

935 # And the transfer should be file to file. 

936 self.assertEqual(move_logs[0].count("file://"), 2) 

937 

938 def testCanNotDeterminePutFormatterLocation(self): 

939 """Verify that the expected exception is raised if the FileDatastore 

940 can not determine the put formatter location.""" 

941 

942 _ = makeExampleMetrics() 

943 datastore = self.makeDatastore() 

944 

945 # Create multiple storage classes for testing different formulations 

946 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

947 

948 sccomp = StorageClass("Dummy") 

949 compositeStorageClass = StorageClass( 

950 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

951 ) 

952 

953 dimensions = self.universe.extract(("visit", "physical_filter")) 

954 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

955 

956 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

957 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False) 

958 

959 def raiser(ref): 

960 raise DatasetTypeNotSupportedError() 

961 

962 with unittest.mock.patch.object( 

963 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

964 "_determine_put_formatter_location", 

965 side_effect=raiser, 

966 ): 

967 # verify the non-composite ref execution path: 

968 with self.assertRaises(DatasetTypeNotSupportedError): 

969 datastore.getURIs(ref, predict=True) 

970 

971 # verify the composite-ref execution path: 

972 with self.assertRaises(DatasetTypeNotSupportedError): 

973 datastore.getURIs(compRef, predict=True) 

974 

975 

976class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

977 """Posix datastore tests but with checksums disabled.""" 

978 

979 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

980 

981 def testChecksum(self): 

982 """Ensure that checksums have not been calculated.""" 

983 

984 datastore = self.makeDatastore() 

985 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

986 dimensions = self.universe.extract(("visit", "physical_filter")) 

987 metrics = makeExampleMetrics() 

988 

989 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

990 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

991 

992 # Configuration should have disabled checksum calculation 

993 datastore.put(metrics, ref) 

994 infos = datastore.getStoredItemsInfo(ref) 

995 self.assertIsNone(infos[0].checksum) 

996 

997 # Remove put back but with checksums enabled explicitly 

998 datastore.remove(ref) 

999 datastore.useChecksum = True 

1000 datastore.put(metrics, ref) 

1001 

1002 infos = datastore.getStoredItemsInfo(ref) 

1003 self.assertIsNotNone(infos[0].checksum) 

1004 

1005 

1006class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1007 """Restrict trash test to FileDatastore.""" 

1008 

1009 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1010 

1011 def testTrash(self): 

1012 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1013 

1014 # Trash one of them. 

1015 ref = refs.pop() 

1016 uri = datastore.getURI(ref) 

1017 datastore.trash(ref) 

1018 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1019 datastore.emptyTrash() 

1020 self.assertFalse(uri.exists(), uri) 

1021 

1022 # Trash it again should be fine. 

1023 datastore.trash(ref) 

1024 

1025 # Trash multiple items at once. 

1026 subset = [refs.pop(), refs.pop()] 

1027 datastore.trash(subset) 

1028 datastore.emptyTrash() 

1029 

1030 # Remove a record and trash should do nothing. 

1031 # This is execution butler scenario. 

1032 ref = refs.pop() 

1033 uri = datastore.getURI(ref) 

1034 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1035 self.assertTrue(uri.exists()) 

1036 datastore.trash(ref) 

1037 datastore.emptyTrash() 

1038 self.assertTrue(uri.exists()) 

1039 

1040 # Switch on trust and it should delete the file. 

1041 datastore.trustGetRequest = True 

1042 datastore.trash([ref]) 

1043 self.assertFalse(uri.exists()) 

1044 

1045 # Remove multiples at once in trust mode. 

1046 subset = [refs.pop() for i in range(3)] 

1047 datastore.trash(subset) 

1048 datastore.trash(refs.pop()) # Check that a single ref can trash 

1049 

1050 

1051class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1052 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1053 

1054 def setUp(self): 

1055 # Override the working directory before calling the base class 

1056 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1057 super().setUp() 

1058 

1059 def testCleanup(self): 

1060 """Test that a failed formatter write does cleanup a partial file.""" 

1061 metrics = makeExampleMetrics() 

1062 datastore = self.makeDatastore() 

1063 

1064 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1065 

1066 dimensions = self.universe.extract(("visit", "physical_filter")) 

1067 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1068 

1069 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1070 

1071 # Determine where the file will end up (we assume Formatters use 

1072 # the same file extension) 

1073 expectedUri = datastore.getURI(ref, predict=True) 

1074 self.assertEqual(expectedUri.fragment, "predicted") 

1075 

1076 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1077 

1078 # Try formatter that fails and formatter that fails and leaves 

1079 # a file behind 

1080 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1081 with self.subTest(formatter=formatter): 

1082 # Monkey patch the formatter 

1083 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1084 

1085 # Try to put the dataset, it should fail 

1086 with self.assertRaises(Exception): 

1087 datastore.put(metrics, ref) 

1088 

1089 # Check that there is no file on disk 

1090 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1091 

1092 # Check that there is a directory 

1093 dir = expectedUri.dirname() 

1094 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1095 

1096 # Force YamlFormatter and check that this time a file is written 

1097 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1098 datastore.put(metrics, ref) 

1099 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1100 datastore.remove(ref) 

1101 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1102 

1103 

1104class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1105 """PosixDatastore specialization""" 

1106 

1107 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1108 uriScheme = "mem" 

1109 hasUnsupportedPut = False 

1110 ingestTransferModes = () 

1111 isEphemeral = True 

1112 rootKeys = None 

1113 validationCanFail = False 

1114 

1115 

1116class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1117 """ChainedDatastore specialization using a POSIXDatastore""" 

1118 

1119 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1120 hasUnsupportedPut = False 

1121 canIngestNoTransferAuto = False 

1122 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto") 

1123 isEphemeral = False 

1124 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1125 validationCanFail = True 

1126 

1127 

1128class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1129 """ChainedDatastore specialization using all InMemoryDatastore""" 

1130 

1131 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1132 validationCanFail = False 

1133 

1134 

1135class DatastoreConstraintsTests(DatastoreTestsBase): 

1136 """Basic tests of constraints model of Datastores.""" 

1137 

1138 def testConstraints(self): 

1139 """Test constraints model. Assumes that each test class has the 

1140 same constraints.""" 

1141 metrics = makeExampleMetrics() 

1142 datastore = self.makeDatastore() 

1143 

1144 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1145 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1146 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1147 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1148 

1149 # Write empty file suitable for ingest check (JSON and YAML variants) 

1150 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1151 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1152 for datasetTypeName, sc, accepted in ( 

1153 ("metric", sc1, True), 

1154 ("metric5", sc1, False), 

1155 ("metric33", sc1, True), 

1156 ("metric5", sc2, True), 

1157 ): 

1158 # Choose different temp file depending on StorageClass 

1159 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1160 

1161 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1162 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

1163 if accepted: 

1164 datastore.put(metrics, ref) 

1165 self.assertTrue(datastore.exists(ref)) 

1166 datastore.remove(ref) 

1167 

1168 # Try ingest 

1169 if self.canIngest: 

1170 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1171 self.assertTrue(datastore.exists(ref)) 

1172 datastore.remove(ref) 

1173 else: 

1174 with self.assertRaises(DatasetTypeNotSupportedError): 

1175 datastore.put(metrics, ref) 

1176 self.assertFalse(datastore.exists(ref)) 

1177 

1178 # Again with ingest 

1179 if self.canIngest: 

1180 with self.assertRaises(DatasetTypeNotSupportedError): 

1181 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1182 self.assertFalse(datastore.exists(ref)) 

1183 

1184 

1185class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1186 """PosixDatastore specialization""" 

1187 

1188 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1189 canIngest = True 

1190 

1191 def setUp(self): 

1192 # Override the working directory before calling the base class 

1193 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1194 super().setUp() 

1195 

1196 

1197class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1198 """InMemoryDatastore specialization""" 

1199 

1200 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1201 canIngest = False 

1202 

1203 

1204class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1205 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1206 at the ChainedDatstore""" 

1207 

1208 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1209 

1210 

1211class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1212 """ChainedDatastore specialization using a POSIXDatastore""" 

1213 

1214 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1215 

1216 

1217class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1218 """ChainedDatastore specialization using all InMemoryDatastore""" 

1219 

1220 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1221 canIngest = False 

1222 

1223 

1224class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1225 """Test that a chained datastore can control constraints per-datastore 

1226 even if child datastore would accept.""" 

1227 

1228 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1229 

1230 def setUp(self): 

1231 # Override the working directory before calling the base class 

1232 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1233 super().setUp() 

1234 

1235 def testConstraints(self): 

1236 """Test chained datastore constraints model.""" 

1237 metrics = makeExampleMetrics() 

1238 datastore = self.makeDatastore() 

1239 

1240 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1241 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1242 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1243 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1244 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1245 

1246 # Write empty file suitable for ingest check (JSON and YAML variants) 

1247 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1248 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1249 

1250 for typeName, dataId, sc, accept, ingest in ( 

1251 ("metric", dataId1, sc1, (False, True, False), True), 

1252 ("metric5", dataId1, sc1, (False, False, False), False), 

1253 ("metric5", dataId2, sc1, (True, False, False), False), 

1254 ("metric33", dataId2, sc2, (True, True, False), True), 

1255 ("metric5", dataId1, sc2, (False, True, False), True), 

1256 ): 

1257 # Choose different temp file depending on StorageClass 

1258 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1259 

1260 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1261 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) 

1262 if any(accept): 

1263 datastore.put(metrics, ref) 

1264 self.assertTrue(datastore.exists(ref)) 

1265 

1266 # Check each datastore inside the chained datastore 

1267 for childDatastore, expected in zip(datastore.datastores, accept): 

1268 self.assertEqual( 

1269 childDatastore.exists(ref), 

1270 expected, 

1271 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1272 ) 

1273 

1274 datastore.remove(ref) 

1275 

1276 # Check that ingest works 

1277 if ingest: 

1278 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1279 self.assertTrue(datastore.exists(ref)) 

1280 

1281 # Check each datastore inside the chained datastore 

1282 for childDatastore, expected in zip(datastore.datastores, accept): 

1283 # Ephemeral datastores means InMemory at the moment 

1284 # and that does not accept ingest of files. 

1285 if childDatastore.isEphemeral: 

1286 expected = False 

1287 self.assertEqual( 

1288 childDatastore.exists(ref), 

1289 expected, 

1290 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1291 ) 

1292 

1293 datastore.remove(ref) 

1294 else: 

1295 with self.assertRaises(DatasetTypeNotSupportedError): 

1296 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1297 

1298 else: 

1299 with self.assertRaises(DatasetTypeNotSupportedError): 

1300 datastore.put(metrics, ref) 

1301 self.assertFalse(datastore.exists(ref)) 

1302 

1303 # Again with ingest 

1304 with self.assertRaises(DatasetTypeNotSupportedError): 

1305 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1306 self.assertFalse(datastore.exists(ref)) 

1307 

1308 

1309class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1310 """Tests for datastore caching infrastructure.""" 

1311 

1312 @classmethod 

1313 def setUpClass(cls): 

1314 cls.storageClassFactory = StorageClassFactory() 

1315 cls.universe = DimensionUniverse() 

1316 

1317 # Ensure that we load the test storage class definitions. 

1318 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1319 cls.storageClassFactory.addFromConfig(scConfigFile) 

1320 

1321 def setUp(self): 

1322 self.id = 0 

1323 

1324 # Create a root that we can use for caching tests. 

1325 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1326 

1327 # Create some test dataset refs and associated test files 

1328 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1329 dimensions = self.universe.extract(("visit", "physical_filter")) 

1330 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1331 

1332 # Create list of refs and list of temporary files 

1333 n_datasets = 10 

1334 self.refs = [ 

1335 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False) 

1336 for n in range(n_datasets) 

1337 ] 

1338 

1339 root_uri = ResourcePath(self.root, forceDirectory=True) 

1340 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1341 

1342 # Create test files. 

1343 for uri in self.files: 

1344 uri.write(b"0123456789") 

1345 

1346 # Create some composite refs with component files. 

1347 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1348 self.composite_refs = [ 

1349 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3) 

1350 ] 

1351 self.comp_files = [] 

1352 self.comp_refs = [] 

1353 for n, ref in enumerate(self.composite_refs): 

1354 component_refs = [] 

1355 component_files = [] 

1356 for component in sc.components: 

1357 component_ref = ref.makeComponentRef(component) 

1358 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1359 component_refs.append(component_ref) 

1360 component_files.append(file) 

1361 file.write(b"9876543210") 

1362 

1363 self.comp_files.append(component_files) 

1364 self.comp_refs.append(component_refs) 

1365 

1366 def tearDown(self): 

1367 if self.root is not None and os.path.exists(self.root): 

1368 shutil.rmtree(self.root, ignore_errors=True) 

1369 

1370 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1371 config = Config.fromYaml(config_str) 

1372 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1373 

1374 def testNoCacheDir(self): 

1375 config_str = """ 

1376cached: 

1377 root: null 

1378 cacheable: 

1379 metric0: true 

1380 """ 

1381 cache_manager = self._make_cache_manager(config_str) 

1382 

1383 # Look inside to check we don't have a cache directory 

1384 self.assertIsNone(cache_manager._cache_directory) 

1385 

1386 self.assertCache(cache_manager) 

1387 

1388 # Test that the cache directory is marked temporary 

1389 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1390 

1391 def testNoCacheDirReversed(self): 

1392 """Use default caching status and metric1 to false""" 

1393 config_str = """ 

1394cached: 

1395 root: null 

1396 default: true 

1397 cacheable: 

1398 metric1: false 

1399 """ 

1400 cache_manager = self._make_cache_manager(config_str) 

1401 

1402 self.assertCache(cache_manager) 

1403 

1404 def testExplicitCacheDir(self): 

1405 config_str = f""" 

1406cached: 

1407 root: '{self.root}' 

1408 cacheable: 

1409 metric0: true 

1410 """ 

1411 cache_manager = self._make_cache_manager(config_str) 

1412 

1413 # Look inside to check we do have a cache directory. 

1414 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1415 

1416 self.assertCache(cache_manager) 

1417 

1418 # Test that the cache directory is not marked temporary 

1419 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1420 

1421 def assertCache(self, cache_manager): 

1422 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1423 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1424 

1425 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1426 self.assertIsInstance(uri, ResourcePath) 

1427 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1428 

1429 # Check presence in cache using ref and then using file extension. 

1430 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1431 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1432 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1433 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1434 

1435 # Cached file should no longer exist but uncached file should be 

1436 # unaffected. 

1437 self.assertFalse(self.files[0].exists()) 

1438 self.assertTrue(self.files[1].exists()) 

1439 

1440 # Should find this file and it should be within the cache directory. 

1441 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1442 self.assertTrue(found.exists()) 

1443 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1444 

1445 # Should not be able to find these in cache 

1446 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1447 self.assertIsNone(found) 

1448 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1449 self.assertIsNone(found) 

1450 

1451 def testNoCache(self): 

1452 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1453 for uri, ref in zip(self.files, self.refs): 

1454 self.assertFalse(cache_manager.should_be_cached(ref)) 

1455 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1456 self.assertFalse(cache_manager.known_to_cache(ref)) 

1457 with cache_manager.find_in_cache(ref, ".txt") as found: 

1458 self.assertIsNone(found, msg=f"{cache_manager}") 

1459 

1460 def _expiration_config(self, mode: str, threshold: int) -> str: 

1461 return f""" 

1462cached: 

1463 default: true 

1464 expiry: 

1465 mode: {mode} 

1466 threshold: {threshold} 

1467 cacheable: 

1468 unused: true 

1469 """ 

1470 

1471 def testCacheExpiryFiles(self): 

1472 threshold = 2 # Keep at least 2 files. 

1473 mode = "files" 

1474 config_str = self._expiration_config(mode, threshold) 

1475 

1476 cache_manager = self._make_cache_manager(config_str) 

1477 

1478 # Check that an empty cache returns unknown for arbitrary ref 

1479 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1480 

1481 # Should end with datasets: 2, 3, 4 

1482 self.assertExpiration(cache_manager, 5, threshold + 1) 

1483 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1484 

1485 # Check that we will not expire a file that is actively in use. 

1486 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1487 self.assertIsNotNone(found) 

1488 

1489 # Trigger cache expiration that should remove the file 

1490 # we just retrieved. Should now have: 3, 4, 5 

1491 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1492 self.assertIsNotNone(cached) 

1493 

1494 # Cache should still report the standard file count. 

1495 self.assertEqual(cache_manager.file_count, threshold + 1) 

1496 

1497 # Add additional entry to cache. 

1498 # Should now have 4, 5, 6 

1499 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1500 self.assertIsNotNone(cached) 

1501 

1502 # Is the file still there? 

1503 self.assertTrue(found.exists()) 

1504 

1505 # Can we read it? 

1506 data = found.read() 

1507 self.assertGreater(len(data), 0) 

1508 

1509 # Outside context the file should no longer exist. 

1510 self.assertFalse(found.exists()) 

1511 

1512 # File count should not have changed. 

1513 self.assertEqual(cache_manager.file_count, threshold + 1) 

1514 

1515 # Dataset 2 was in the exempt directory but because hardlinks 

1516 # are used it was deleted from the main cache during cache expiry 

1517 # above and so should no longer be found. 

1518 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1519 self.assertIsNone(found) 

1520 

1521 # And the one stored after it is also gone. 

1522 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1523 self.assertIsNone(found) 

1524 

1525 # But dataset 4 is present. 

1526 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1527 self.assertIsNotNone(found) 

1528 

1529 # Adding a new dataset to the cache should now delete it. 

1530 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1531 

1532 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1533 self.assertIsNone(found) 

1534 

1535 def testCacheExpiryDatasets(self): 

1536 threshold = 2 # Keep 2 datasets. 

1537 mode = "datasets" 

1538 config_str = self._expiration_config(mode, threshold) 

1539 

1540 cache_manager = self._make_cache_manager(config_str) 

1541 self.assertExpiration(cache_manager, 5, threshold + 1) 

1542 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1543 

1544 def testCacheExpiryDatasetsComposite(self): 

1545 threshold = 2 # Keep 2 datasets. 

1546 mode = "datasets" 

1547 config_str = self._expiration_config(mode, threshold) 

1548 

1549 cache_manager = self._make_cache_manager(config_str) 

1550 

1551 n_datasets = 3 

1552 for i in range(n_datasets): 

1553 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1554 cached = cache_manager.move_to_cache(component_file, component_ref) 

1555 self.assertIsNotNone(cached) 

1556 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1557 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1558 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1559 

1560 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1561 

1562 # Write two new non-composite and the number of files should drop. 

1563 self.assertExpiration(cache_manager, 2, 5) 

1564 

1565 def testCacheExpirySize(self): 

1566 threshold = 55 # Each file is 10 bytes 

1567 mode = "size" 

1568 config_str = self._expiration_config(mode, threshold) 

1569 

1570 cache_manager = self._make_cache_manager(config_str) 

1571 self.assertExpiration(cache_manager, 10, 6) 

1572 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1573 

1574 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1575 """Insert the datasets and then check the number retained.""" 

1576 for i in range(n_datasets): 

1577 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1578 self.assertIsNotNone(cached) 

1579 

1580 self.assertEqual(cache_manager.file_count, n_retained) 

1581 

1582 # The oldest file should not be in the cache any more. 

1583 for i in range(n_datasets): 

1584 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1585 if i >= n_datasets - n_retained: 

1586 self.assertIsInstance(found, ResourcePath) 

1587 else: 

1588 self.assertIsNone(found) 

1589 

1590 def testCacheExpiryAge(self): 

1591 threshold = 1 # Expire older than 2 seconds 

1592 mode = "age" 

1593 config_str = self._expiration_config(mode, threshold) 

1594 

1595 cache_manager = self._make_cache_manager(config_str) 

1596 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1597 

1598 # Insert 3 files, then sleep, then insert more. 

1599 for i in range(2): 

1600 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1601 self.assertIsNotNone(cached) 

1602 time.sleep(2.0) 

1603 for j in range(4): 

1604 i = 2 + j # Continue the counting 

1605 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1606 self.assertIsNotNone(cached) 

1607 

1608 # Only the files written after the sleep should exist. 

1609 self.assertEqual(cache_manager.file_count, 4) 

1610 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1611 self.assertIsNone(found) 

1612 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1613 self.assertIsInstance(found, ResourcePath) 

1614 

1615 

1616class DatasetRefURIsTestCase(unittest.TestCase): 

1617 """Tests for DatasetRefURIs.""" 

1618 

1619 def testSequenceAccess(self): 

1620 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1621 uris = DatasetRefURIs() 

1622 

1623 self.assertEqual(len(uris), 2) 

1624 self.assertEqual(uris[0], None) 

1625 self.assertEqual(uris[1], {}) 

1626 

1627 primaryURI = ResourcePath("1/2/3") 

1628 componentURI = ResourcePath("a/b/c") 

1629 

1630 # affirm that DatasetRefURIs does not support MutableSequence functions 

1631 with self.assertRaises(TypeError): 

1632 uris[0] = primaryURI 

1633 with self.assertRaises(TypeError): 

1634 uris[1] = {"foo": componentURI} 

1635 

1636 # but DatasetRefURIs can be set by property name: 

1637 uris.primaryURI = primaryURI 

1638 uris.componentURIs = {"foo": componentURI} 

1639 self.assertEqual(uris.primaryURI, primaryURI) 

1640 self.assertEqual(uris[0], primaryURI) 

1641 

1642 primary, components = uris 

1643 self.assertEqual(primary, primaryURI) 

1644 self.assertEqual(components, {"foo": componentURI}) 

1645 

1646 def testRepr(self): 

1647 """Verify __repr__ output.""" 

1648 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")}) 

1649 self.assertEqual( 

1650 repr(uris), 

1651 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), ' 

1652 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})', 

1653 ) 

1654 

1655 

1656class DataIdForTestTestCase(unittest.TestCase): 

1657 """Tests for the DataIdForTest class.""" 

1658 

1659 def testImmutable(self): 

1660 """Verify that an instance is immutable by default.""" 

1661 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1662 initial_hash = hash(dataId) 

1663 

1664 with self.assertRaises(RuntimeError): 

1665 dataId["instrument"] = "foo" 

1666 

1667 with self.assertRaises(RuntimeError): 

1668 del dataId["instrument"] 

1669 

1670 assert sys.version_info[0] == 3 

1671 if sys.version_info[1] >= 9: 

1672 with self.assertRaises(RuntimeError): 

1673 dataId |= dict(foo="bar") 

1674 

1675 with self.assertRaises(RuntimeError): 

1676 dataId.pop("instrument") 

1677 

1678 with self.assertRaises(RuntimeError): 

1679 dataId.popitem() 

1680 

1681 with self.assertRaises(RuntimeError): 

1682 dataId.update(dict(instrument="foo")) 

1683 

1684 # verify that the hash value has not changed. 

1685 self.assertEqual(initial_hash, hash(dataId)) 

1686 

1687 def testMutable(self): 

1688 """Verify that an instance can be made mutable (unfrozen).""" 

1689 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1690 initial_hash = hash(dataId) 

1691 dataId.frozen = False 

1692 self.assertEqual(initial_hash, hash(dataId)) 

1693 

1694 dataId["instrument"] = "foo" 

1695 self.assertEqual(dataId["instrument"], "foo") 

1696 self.assertNotEqual(initial_hash, hash(dataId)) 

1697 initial_hash = hash(dataId) 

1698 

1699 del dataId["instrument"] 

1700 self.assertTrue("instrument" not in dataId) 

1701 self.assertNotEqual(initial_hash, hash(dataId)) 

1702 initial_hash = hash(dataId) 

1703 

1704 assert sys.version_info[0] == 3 

1705 if sys.version_info[1] >= 9: 

1706 dataId |= dict(foo="bar") 

1707 self.assertEqual(dataId["foo"], "bar") 

1708 self.assertNotEqual(initial_hash, hash(dataId)) 

1709 initial_hash = hash(dataId) 

1710 

1711 dataId.pop("visit") 

1712 self.assertTrue("visit" not in dataId) 

1713 self.assertNotEqual(initial_hash, hash(dataId)) 

1714 initial_hash = hash(dataId) 

1715 

1716 dataId.popitem() 

1717 self.assertTrue("physical_filter" not in dataId) 

1718 self.assertNotEqual(initial_hash, hash(dataId)) 

1719 initial_hash = hash(dataId) 

1720 

1721 dataId.update(dict(instrument="foo")) 

1722 self.assertEqual(dataId["instrument"], "foo") 

1723 self.assertNotEqual(initial_hash, hash(dataId)) 

1724 initial_hash = hash(dataId) 

1725 

1726 

1727if __name__ == "__main__": 1727 ↛ 1728line 1727 didn't jump to line 1728, because the condition on line 1727 was never true

1728 unittest.main()