Coverage for tests/test_datastore.py: 12%

953 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-07 09:47 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import shutil 

24import sys 

25import tempfile 

26import time 

27import unittest 

28from collections import UserDict 

29from dataclasses import dataclass 

30 

31import lsst.utils.tests 

32import yaml 

33from lsst.daf.butler import ( 

34 Config, 

35 DatasetRefURIs, 

36 DatasetTypeNotSupportedError, 

37 DatastoreCacheManager, 

38 DatastoreCacheManagerConfig, 

39 DatastoreConfig, 

40 DatastoreDisabledCacheManager, 

41 DatastoreValidationError, 

42 DimensionUniverse, 

43 FileDataset, 

44 NamedKeyDict, 

45 StorageClass, 

46 StorageClassFactory, 

47) 

48from lsst.daf.butler.formatters.yaml import YamlFormatter 

49from lsst.daf.butler.tests import ( 

50 BadNoWriteFormatter, 

51 BadWriteFormatter, 

52 DatasetTestHelper, 

53 DatastoreTestHelper, 

54 DummyRegistry, 

55 MetricsExample, 

56) 

57from lsst.resources import ResourcePath 

58from lsst.utils import doImport 

59 

60TESTDIR = os.path.dirname(__file__) 

61 

62 

63class DataIdForTest(UserDict): 

64 

65 """A dict-like class that can be used for a DataId dict that is hashable. 

66 

67 By default the class is immutable ("frozen"). The `frozen` 

68 attribute can be set to `False` to change values (but note that 

69 the hash values before and after mutation will be different!). 

70 """ 

71 

72 def __init__(self, *args, **kwargs): 

73 self.frozen = False 

74 super().__init__(*args, **kwargs) 

75 self.frozen = True 

76 

77 def __hash__(self): 

78 return hash(str(self.data)) 

79 

80 def __setitem__(self, k, v): 

81 if self.frozen: 

82 raise RuntimeError("DataIdForTest is frozen.") 

83 return super().__setitem__(k, v) 

84 

85 def __delitem__(self, k): 

86 if self.frozen: 

87 raise RuntimeError("DataIdForTest is frozen.") 

88 return super().__delitem__(k) 

89 

90 def __ior__(self, other): 

91 assert sys.version_info[0] == 3 

92 if sys.version_info[1] < 9: 

93 raise NotImplementedError("operator |= (ior) is not supported before version 3.9") 

94 if self.frozen: 

95 raise RuntimeError("DataIdForTest is frozen.") 

96 return super().__ior__(other) 

97 

98 def pop(self, k): 

99 if self.frozen: 

100 raise RuntimeError("DataIdForTest is frozen.") 

101 return super().pop(k) 

102 

103 def popitem(self): 

104 if self.frozen: 

105 raise RuntimeError("DataIdForTest is frozen.") 

106 return super().popitem() 

107 

108 def update(self, *args, **kwargs): 

109 if self.frozen: 

110 raise RuntimeError("DataIdForTest is frozen.") 

111 super().update(*args, **kwargs) 

112 

113 

114def makeExampleMetrics(use_none=False): 

115 if use_none: 

116 array = None 

117 else: 

118 array = [563, 234, 456.7, 105, 2054, -1045] 

119 return MetricsExample( 

120 {"AM1": 5.2, "AM2": 30.6}, 

121 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

122 array, 

123 ) 

124 

125 

126@dataclass(frozen=True) 

127class Named: 

128 name: str 

129 

130 

131class FakeDataCoordinate(NamedKeyDict): 

132 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

133 

134 @classmethod 

135 def from_dict(cls, dataId): 

136 new = cls() 

137 for k, v in dataId.items(): 

138 new[Named(k)] = v 

139 return new.freeze() 

140 

141 def __hash__(self) -> int: 

142 return hash(frozenset(self.items())) 

143 

144 

145class TransactionTestError(Exception): 

146 """Specific error for transactions, to prevent misdiagnosing 

147 that might otherwise occur when a standard exception is used. 

148 """ 

149 

150 pass 

151 

152 

153class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

154 """Support routines for datastore testing""" 

155 

156 root = None 

157 

158 @classmethod 

159 def setUpClass(cls): 

160 # Storage Classes are fixed for all datastores in these tests 

161 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

162 cls.storageClassFactory = StorageClassFactory() 

163 cls.storageClassFactory.addFromConfig(scConfigFile) 

164 

165 # Read the Datastore config so we can get the class 

166 # information (since we should not assume the constructor 

167 # name here, but rely on the configuration file itself) 

168 datastoreConfig = DatastoreConfig(cls.configFile) 

169 cls.datastoreType = doImport(datastoreConfig["cls"]) 

170 cls.universe = DimensionUniverse() 

171 

172 def setUp(self): 

173 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

174 

175 def tearDown(self): 

176 if self.root is not None and os.path.exists(self.root): 

177 shutil.rmtree(self.root, ignore_errors=True) 

178 

179 

180class DatastoreTests(DatastoreTestsBase): 

181 """Some basic tests of a simple datastore.""" 

182 

183 hasUnsupportedPut = True 

184 

185 def testConfigRoot(self): 

186 full = DatastoreConfig(self.configFile) 

187 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

188 newroot = "/random/location" 

189 self.datastoreType.setConfigRoot(newroot, config, full) 

190 if self.rootKeys: 

191 for k in self.rootKeys: 

192 self.assertIn(newroot, config[k]) 

193 

194 def testConstructor(self): 

195 datastore = self.makeDatastore() 

196 self.assertIsNotNone(datastore) 

197 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

198 

199 def testConfigurationValidation(self): 

200 datastore = self.makeDatastore() 

201 sc = self.storageClassFactory.getStorageClass("ThingOne") 

202 datastore.validateConfiguration([sc]) 

203 

204 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

205 if self.validationCanFail: 

206 with self.assertRaises(DatastoreValidationError): 

207 datastore.validateConfiguration([sc2], logFailures=True) 

208 

209 dimensions = self.universe.extract(("visit", "physical_filter")) 

210 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

211 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

212 datastore.validateConfiguration([ref]) 

213 

214 def testParameterValidation(self): 

215 """Check that parameters are validated""" 

216 sc = self.storageClassFactory.getStorageClass("ThingOne") 

217 dimensions = self.universe.extract(("visit", "physical_filter")) 

218 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

219 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

220 datastore = self.makeDatastore() 

221 data = {1: 2, 3: 4} 

222 datastore.put(data, ref) 

223 newdata = datastore.get(ref) 

224 self.assertEqual(data, newdata) 

225 with self.assertRaises(KeyError): 

226 newdata = datastore.get(ref, parameters={"missing": 5}) 

227 

228 def testBasicPutGet(self): 

229 metrics = makeExampleMetrics() 

230 datastore = self.makeDatastore() 

231 

232 # Create multiple storage classes for testing different formulations 

233 storageClasses = [ 

234 self.storageClassFactory.getStorageClass(sc) 

235 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

236 ] 

237 

238 dimensions = self.universe.extract(("visit", "physical_filter")) 

239 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

240 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"}) 

241 

242 for sc in storageClasses: 

243 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

244 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False) 

245 

246 # Make sure that using getManyURIs without predicting before the 

247 # dataset has been put raises. 

248 with self.assertRaises(FileNotFoundError): 

249 datastore.getManyURIs([ref], predict=False) 

250 

251 # Make sure that using getManyURIs with predicting before the 

252 # dataset has been put predicts the URI. 

253 uris = datastore.getManyURIs([ref, ref2], predict=True) 

254 self.assertIn("52", uris[ref].primaryURI.geturl()) 

255 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

256 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

257 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

258 

259 datastore.put(metrics, ref) 

260 

261 # Does it exist? 

262 self.assertTrue(datastore.exists(ref)) 

263 

264 # Get 

265 metricsOut = datastore.get(ref, parameters=None) 

266 self.assertEqual(metrics, metricsOut) 

267 

268 uri = datastore.getURI(ref) 

269 self.assertEqual(uri.scheme, self.uriScheme) 

270 

271 uris = datastore.getManyURIs([ref]) 

272 self.assertEqual(len(uris), 1) 

273 ref, uri = uris.popitem() 

274 self.assertTrue(uri.primaryURI.exists()) 

275 self.assertFalse(uri.componentURIs) 

276 

277 # Get a component -- we need to construct new refs for them 

278 # with derived storage classes but with parent ID 

279 for comp in ("data", "output"): 

280 compRef = ref.makeComponentRef(comp) 

281 output = datastore.get(compRef) 

282 self.assertEqual(output, getattr(metricsOut, comp)) 

283 

284 uri = datastore.getURI(compRef) 

285 self.assertEqual(uri.scheme, self.uriScheme) 

286 

287 uris = datastore.getManyURIs([compRef]) 

288 self.assertEqual(len(uris), 1) 

289 

290 storageClass = sc 

291 

292 # Check that we can put a metric with None in a component and 

293 # get it back as None 

294 metricsNone = makeExampleMetrics(use_none=True) 

295 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

296 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

297 datastore.put(metricsNone, refNone) 

298 

299 comp = "data" 

300 for comp in ("data", "output"): 

301 compRef = refNone.makeComponentRef(comp) 

302 output = datastore.get(compRef) 

303 self.assertEqual(output, getattr(metricsNone, comp)) 

304 

305 # Check that a put fails if the dataset type is not supported 

306 if self.hasUnsupportedPut: 

307 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

308 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

309 with self.assertRaises(DatasetTypeNotSupportedError): 

310 datastore.put(metrics, ref) 

311 

312 # These should raise 

313 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

314 with self.assertRaises(FileNotFoundError): 

315 # non-existing file 

316 datastore.get(ref) 

317 

318 # Get a URI from it 

319 uri = datastore.getURI(ref, predict=True) 

320 self.assertEqual(uri.scheme, self.uriScheme) 

321 

322 with self.assertRaises(FileNotFoundError): 

323 datastore.getURI(ref) 

324 

325 def testTrustGetRequest(self): 

326 """Check that we can get datasets that registry knows nothing about.""" 

327 

328 datastore = self.makeDatastore() 

329 

330 # Skip test if the attribute is not defined 

331 if not hasattr(datastore, "trustGetRequest"): 

332 return 

333 

334 metrics = makeExampleMetrics() 

335 

336 i = 0 

337 for sc_name in ("StructuredData", "StructuredComposite"): 

338 i += 1 

339 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

340 

341 if sc_name == "StructuredComposite": 

342 disassembled = True 

343 else: 

344 disassembled = False 

345 

346 # Start datastore in default configuration of using registry 

347 datastore.trustGetRequest = False 

348 

349 # Create multiple storage classes for testing with or without 

350 # disassembly 

351 sc = self.storageClassFactory.getStorageClass(sc_name) 

352 dimensions = self.universe.extract(("visit", "physical_filter")) 

353 

354 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}) 

355 

356 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

357 datastore.put(metrics, ref) 

358 

359 # Does it exist? 

360 self.assertTrue(datastore.exists(ref)) 

361 

362 # Get 

363 metricsOut = datastore.get(ref) 

364 self.assertEqual(metrics, metricsOut) 

365 

366 # Get the URI(s) 

367 primaryURI, componentURIs = datastore.getURIs(ref) 

368 if disassembled: 

369 self.assertIsNone(primaryURI) 

370 self.assertEqual(len(componentURIs), 3) 

371 else: 

372 self.assertIn(datasetTypeName, primaryURI.path) 

373 self.assertFalse(componentURIs) 

374 

375 # Delete registry entry so now we are trusting 

376 datastore.removeStoredItemInfo(ref) 

377 

378 # Now stop trusting and check that things break 

379 datastore.trustGetRequest = False 

380 

381 # Does it exist? 

382 self.assertFalse(datastore.exists(ref)) 

383 

384 with self.assertRaises(FileNotFoundError): 

385 datastore.get(ref) 

386 

387 with self.assertRaises(FileNotFoundError): 

388 datastore.get(ref.makeComponentRef("data")) 

389 

390 # URI should fail unless we ask for prediction 

391 with self.assertRaises(FileNotFoundError): 

392 datastore.getURIs(ref) 

393 

394 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

395 if disassembled: 

396 self.assertIsNone(predicted_primary) 

397 self.assertEqual(len(predicted_disassembled), 3) 

398 for uri in predicted_disassembled.values(): 

399 self.assertEqual(uri.fragment, "predicted") 

400 self.assertIn(datasetTypeName, uri.path) 

401 else: 

402 self.assertIn(datasetTypeName, predicted_primary.path) 

403 self.assertFalse(predicted_disassembled) 

404 self.assertEqual(predicted_primary.fragment, "predicted") 

405 

406 # Now enable registry-free trusting mode 

407 datastore.trustGetRequest = True 

408 

409 # Try again to get it 

410 metricsOut = datastore.get(ref) 

411 self.assertEqual(metricsOut, metrics) 

412 

413 # Does it exist? 

414 self.assertTrue(datastore.exists(ref)) 

415 

416 # Get a component 

417 comp = "data" 

418 compRef = ref.makeComponentRef(comp) 

419 output = datastore.get(compRef) 

420 self.assertEqual(output, getattr(metrics, comp)) 

421 

422 # Get the URI -- if we trust this should work even without 

423 # enabling prediction. 

424 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

425 self.assertEqual(primaryURI2, primaryURI) 

426 self.assertEqual(componentURIs2, componentURIs) 

427 

428 def testDisassembly(self): 

429 """Test disassembly within datastore.""" 

430 metrics = makeExampleMetrics() 

431 if self.isEphemeral: 

432 # in-memory datastore does not disassemble 

433 return 

434 

435 # Create multiple storage classes for testing different formulations 

436 # of composites. One of these will not disassemble to provide 

437 # a reference. 

438 storageClasses = [ 

439 self.storageClassFactory.getStorageClass(sc) 

440 for sc in ( 

441 "StructuredComposite", 

442 "StructuredCompositeTestA", 

443 "StructuredCompositeTestB", 

444 "StructuredCompositeReadComp", 

445 "StructuredData", # No disassembly 

446 "StructuredCompositeReadCompNoDisassembly", 

447 ) 

448 ] 

449 

450 # Create the test datastore 

451 datastore = self.makeDatastore() 

452 

453 # Dummy dataId 

454 dimensions = self.universe.extract(("visit", "physical_filter")) 

455 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

456 

457 for i, sc in enumerate(storageClasses): 

458 with self.subTest(storageClass=sc.name): 

459 # Create a different dataset type each time round 

460 # so that a test failure in this subtest does not trigger 

461 # a cascade of tests because of file clashes 

462 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False) 

463 

464 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

465 

466 datastore.put(metrics, ref) 

467 

468 baseURI, compURIs = datastore.getURIs(ref) 

469 if disassembled: 

470 self.assertIsNone(baseURI) 

471 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

472 else: 

473 self.assertIsNotNone(baseURI) 

474 self.assertEqual(compURIs, {}) 

475 

476 metrics_get = datastore.get(ref) 

477 self.assertEqual(metrics_get, metrics) 

478 

479 # Retrieve the composite with read parameter 

480 stop = 4 

481 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

482 self.assertEqual(metrics_get.summary, metrics.summary) 

483 self.assertEqual(metrics_get.output, metrics.output) 

484 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

485 

486 # Retrieve a component 

487 data = datastore.get(ref.makeComponentRef("data")) 

488 self.assertEqual(data, metrics.data) 

489 

490 # On supported storage classes attempt to access a read 

491 # only component 

492 if "ReadComp" in sc.name: 

493 cRef = ref.makeComponentRef("counter") 

494 counter = datastore.get(cRef) 

495 self.assertEqual(counter, len(metrics.data)) 

496 

497 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

498 self.assertEqual(counter, stop) 

499 

500 datastore.remove(ref) 

501 

502 def prepDeleteTest(self, n_refs=1): 

503 metrics = makeExampleMetrics() 

504 datastore = self.makeDatastore() 

505 # Put 

506 dimensions = self.universe.extract(("visit", "physical_filter")) 

507 sc = self.storageClassFactory.getStorageClass("StructuredData") 

508 refs = [] 

509 for i in range(n_refs): 

510 dataId = FakeDataCoordinate.from_dict( 

511 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"} 

512 ) 

513 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

514 datastore.put(metrics, ref) 

515 

516 # Does it exist? 

517 self.assertTrue(datastore.exists(ref)) 

518 

519 # Get 

520 metricsOut = datastore.get(ref) 

521 self.assertEqual(metrics, metricsOut) 

522 refs.append(ref) 

523 

524 return datastore, *refs 

525 

526 def testRemove(self): 

527 datastore, ref = self.prepDeleteTest() 

528 

529 # Remove 

530 datastore.remove(ref) 

531 

532 # Does it exist? 

533 self.assertFalse(datastore.exists(ref)) 

534 

535 # Do we now get a predicted URI? 

536 uri = datastore.getURI(ref, predict=True) 

537 self.assertEqual(uri.fragment, "predicted") 

538 

539 # Get should now fail 

540 with self.assertRaises(FileNotFoundError): 

541 datastore.get(ref) 

542 # Can only delete once 

543 with self.assertRaises(FileNotFoundError): 

544 datastore.remove(ref) 

545 

546 def testForget(self): 

547 datastore, ref = self.prepDeleteTest() 

548 

549 # Remove 

550 datastore.forget([ref]) 

551 

552 # Does it exist (as far as we know)? 

553 self.assertFalse(datastore.exists(ref)) 

554 

555 # Do we now get a predicted URI? 

556 uri = datastore.getURI(ref, predict=True) 

557 self.assertEqual(uri.fragment, "predicted") 

558 

559 # Get should now fail 

560 with self.assertRaises(FileNotFoundError): 

561 datastore.get(ref) 

562 

563 # Forgetting again is a silent no-op 

564 datastore.forget([ref]) 

565 

566 # Predicted URI should still point to the file. 

567 self.assertTrue(uri.exists()) 

568 

569 def testTransfer(self): 

570 metrics = makeExampleMetrics() 

571 

572 dimensions = self.universe.extract(("visit", "physical_filter")) 

573 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"} 

574 

575 sc = self.storageClassFactory.getStorageClass("StructuredData") 

576 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

577 

578 inputDatastore = self.makeDatastore("test_input_datastore") 

579 outputDatastore = self.makeDatastore("test_output_datastore") 

580 

581 inputDatastore.put(metrics, ref) 

582 outputDatastore.transfer(inputDatastore, ref) 

583 

584 metricsOut = outputDatastore.get(ref) 

585 self.assertEqual(metrics, metricsOut) 

586 

587 def testBasicTransaction(self): 

588 datastore = self.makeDatastore() 

589 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

590 dimensions = self.universe.extract(("visit", "physical_filter")) 

591 nDatasets = 6 

592 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] 

593 data = [ 

594 ( 

595 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

596 makeExampleMetrics(), 

597 ) 

598 for dataId in dataIds 

599 ] 

600 succeed = data[: nDatasets // 2] 

601 fail = data[nDatasets // 2 :] 

602 # All datasets added in this transaction should continue to exist 

603 with datastore.transaction(): 

604 for ref, metrics in succeed: 

605 datastore.put(metrics, ref) 

606 # Whereas datasets added in this transaction should not 

607 with self.assertRaises(TransactionTestError): 

608 with datastore.transaction(): 

609 for ref, metrics in fail: 

610 datastore.put(metrics, ref) 

611 raise TransactionTestError("This should propagate out of the context manager") 

612 # Check for datasets that should exist 

613 for ref, metrics in succeed: 

614 # Does it exist? 

615 self.assertTrue(datastore.exists(ref)) 

616 # Get 

617 metricsOut = datastore.get(ref, parameters=None) 

618 self.assertEqual(metrics, metricsOut) 

619 # URI 

620 uri = datastore.getURI(ref) 

621 self.assertEqual(uri.scheme, self.uriScheme) 

622 # Check for datasets that should not exist 

623 for ref, _ in fail: 

624 # These should raise 

625 with self.assertRaises(FileNotFoundError): 

626 # non-existing file 

627 datastore.get(ref) 

628 with self.assertRaises(FileNotFoundError): 

629 datastore.getURI(ref) 

630 

631 def testNestedTransaction(self): 

632 datastore = self.makeDatastore() 

633 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

634 dimensions = self.universe.extract(("visit", "physical_filter")) 

635 metrics = makeExampleMetrics() 

636 

637 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

638 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

639 datastore.put(metrics, refBefore) 

640 with self.assertRaises(TransactionTestError): 

641 with datastore.transaction(): 

642 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} 

643 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

644 datastore.put(metrics, refOuter) 

645 with datastore.transaction(): 

646 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} 

647 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

648 datastore.put(metrics, refInner) 

649 # All datasets should exist 

650 for ref in (refBefore, refOuter, refInner): 

651 metricsOut = datastore.get(ref, parameters=None) 

652 self.assertEqual(metrics, metricsOut) 

653 raise TransactionTestError("This should roll back the transaction") 

654 # Dataset(s) inserted before the transaction should still exist 

655 metricsOut = datastore.get(refBefore, parameters=None) 

656 self.assertEqual(metrics, metricsOut) 

657 # But all datasets inserted during the (rolled back) transaction 

658 # should be gone 

659 with self.assertRaises(FileNotFoundError): 

660 datastore.get(refOuter) 

661 with self.assertRaises(FileNotFoundError): 

662 datastore.get(refInner) 

663 

664 def _prepareIngestTest(self): 

665 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

666 dimensions = self.universe.extract(("visit", "physical_filter")) 

667 metrics = makeExampleMetrics() 

668 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

669 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

670 return metrics, ref 

671 

672 def runIngestTest(self, func, expectOutput=True): 

673 metrics, ref = self._prepareIngestTest() 

674 # The file will be deleted after the test. 

675 # For symlink tests this leads to a situation where the datastore 

676 # points to a file that does not exist. This will make os.path.exist 

677 # return False but then the new symlink will fail with 

678 # FileExistsError later in the code so the test still passes. 

679 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

680 with open(path, "w") as fd: 

681 yaml.dump(metrics._asdict(), stream=fd) 

682 func(metrics, path, ref) 

683 

684 def testIngestNoTransfer(self): 

685 """Test ingesting existing files with no transfer.""" 

686 for mode in (None, "auto"): 

687 

688 # Some datastores have auto but can't do in place transfer 

689 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

690 continue 

691 

692 with self.subTest(mode=mode): 

693 datastore = self.makeDatastore() 

694 

695 def succeed(obj, path, ref): 

696 """Ingest a file already in the datastore root.""" 

697 # first move it into the root, and adjust the path 

698 # accordingly 

699 path = shutil.copy(path, datastore.root.ospath) 

700 path = os.path.relpath(path, start=datastore.root.ospath) 

701 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

702 self.assertEqual(obj, datastore.get(ref)) 

703 

704 def failInputDoesNotExist(obj, path, ref): 

705 """Can't ingest files if we're given a bad path.""" 

706 with self.assertRaises(FileNotFoundError): 

707 datastore.ingest( 

708 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

709 ) 

710 self.assertFalse(datastore.exists(ref)) 

711 

712 def failOutsideRoot(obj, path, ref): 

713 """Can't ingest files outside of datastore root unless 

714 auto.""" 

715 if mode == "auto": 

716 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

717 self.assertTrue(datastore.exists(ref)) 

718 else: 

719 with self.assertRaises(RuntimeError): 

720 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

721 self.assertFalse(datastore.exists(ref)) 

722 

723 def failNotImplemented(obj, path, ref): 

724 with self.assertRaises(NotImplementedError): 

725 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

726 

727 if mode in self.ingestTransferModes: 

728 self.runIngestTest(failOutsideRoot) 

729 self.runIngestTest(failInputDoesNotExist) 

730 self.runIngestTest(succeed) 

731 else: 

732 self.runIngestTest(failNotImplemented) 

733 

734 def testIngestTransfer(self): 

735 """Test ingesting existing files after transferring them.""" 

736 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

737 with self.subTest(mode=mode): 

738 datastore = self.makeDatastore(mode) 

739 

740 def succeed(obj, path, ref): 

741 """Ingest a file by transferring it to the template 

742 location.""" 

743 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

744 self.assertEqual(obj, datastore.get(ref)) 

745 

746 def failInputDoesNotExist(obj, path, ref): 

747 """Can't ingest files if we're given a bad path.""" 

748 with self.assertRaises(FileNotFoundError): 

749 # Ensure the file does not look like it is in 

750 # datastore for auto mode 

751 datastore.ingest( 

752 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

753 ) 

754 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

755 

756 def failNotImplemented(obj, path, ref): 

757 with self.assertRaises(NotImplementedError): 

758 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

759 

760 if mode in self.ingestTransferModes: 

761 self.runIngestTest(failInputDoesNotExist) 

762 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

763 else: 

764 self.runIngestTest(failNotImplemented) 

765 

766 def testIngestSymlinkOfSymlink(self): 

767 """Special test for symlink to a symlink ingest""" 

768 metrics, ref = self._prepareIngestTest() 

769 # The aim of this test is to create a dataset on disk, then 

770 # create a symlink to it and finally ingest the symlink such that 

771 # the symlink in the datastore points to the original dataset. 

772 for mode in ("symlink", "relsymlink"): 

773 if mode not in self.ingestTransferModes: 

774 continue 

775 

776 print(f"Trying mode {mode}") 

777 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

778 with open(realpath, "w") as fd: 

779 yaml.dump(metrics._asdict(), stream=fd) 

780 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

781 os.symlink(os.path.abspath(realpath), sympath) 

782 

783 datastore = self.makeDatastore() 

784 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

785 

786 uri = datastore.getURI(ref) 

787 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

788 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

789 

790 linkTarget = os.readlink(uri.ospath) 

791 if mode == "relsymlink": 

792 self.assertFalse(os.path.isabs(linkTarget)) 

793 else: 

794 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

795 

796 # Check that we can get the dataset back regardless of mode 

797 metric2 = datastore.get(ref) 

798 self.assertEqual(metric2, metrics) 

799 

800 # Cleanup the file for next time round loop 

801 # since it will get the same file name in store 

802 datastore.remove(ref) 

803 

804 def testExportImportRecords(self): 

805 """Test for export_records and import_records methods.""" 

806 

807 datastore = self.makeDatastore("test_datastore") 

808 

809 # For now only the FileDatastore can be used for this test. 

810 # ChainedDatastore that only includes InMemoryDatastores have to be 

811 # skipped as well. 

812 for name in datastore.names: 

813 if not name.startswith("InMemoryDatastore"): 

814 break 

815 else: 

816 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

817 

818 metrics = makeExampleMetrics() 

819 dimensions = self.universe.extract(("visit", "physical_filter")) 

820 sc = self.storageClassFactory.getStorageClass("StructuredData") 

821 

822 refs = [] 

823 for visit in (2048, 2049, 2050): 

824 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"} 

825 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

826 datastore.put(metrics, ref) 

827 refs.append(ref) 

828 

829 for exported_refs in (refs, refs[1:]): 

830 n_refs = len(exported_refs) 

831 records = datastore.export_records(exported_refs) 

832 self.assertGreater(len(records), 0) 

833 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

834 # In a ChainedDatastore each FileDatastore will have a complete set 

835 for datastore_name in records: 

836 record_data = records[datastore_name] 

837 self.assertEqual(len(record_data.records), n_refs) 

838 

839 # Use the same datastore name to import relative path. 

840 datastore2 = self.makeDatastore("test_datastore") 

841 

842 records = datastore.export_records(refs[1:]) 

843 datastore2.import_records(records) 

844 

845 with self.assertRaises(FileNotFoundError): 

846 data = datastore2.get(refs[0]) 

847 data = datastore2.get(refs[1]) 

848 self.assertIsNotNone(data) 

849 data = datastore2.get(refs[2]) 

850 self.assertIsNotNone(data) 

851 

852 

853class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

854 """PosixDatastore specialization""" 

855 

856 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

857 uriScheme = "file" 

858 canIngestNoTransferAuto = True 

859 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

860 isEphemeral = False 

861 rootKeys = ("root",) 

862 validationCanFail = True 

863 

864 def setUp(self): 

865 # Override the working directory before calling the base class 

866 self.root = tempfile.mkdtemp(dir=TESTDIR) 

867 super().setUp() 

868 

869 def testAtomicWrite(self): 

870 """Test that we write to a temporary and then rename""" 

871 datastore = self.makeDatastore() 

872 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

873 dimensions = self.universe.extract(("visit", "physical_filter")) 

874 metrics = makeExampleMetrics() 

875 

876 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

877 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

878 

879 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

880 datastore.put(metrics, ref) 

881 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

882 self.assertIn("transfer=move", move_logs[0]) 

883 

884 # And the transfer should be file to file. 

885 self.assertEqual(move_logs[0].count("file://"), 2) 

886 

887 def testCanNotDeterminePutFormatterLocation(self): 

888 """Verify that the expected exception is raised if the FileDatastore 

889 can not determine the put formatter location.""" 

890 

891 _ = makeExampleMetrics() 

892 datastore = self.makeDatastore() 

893 

894 # Create multiple storage classes for testing different formulations 

895 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

896 

897 sccomp = StorageClass("Dummy") 

898 compositeStorageClass = StorageClass( 

899 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

900 ) 

901 

902 dimensions = self.universe.extract(("visit", "physical_filter")) 

903 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

904 

905 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

906 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False) 

907 

908 def raiser(ref): 

909 raise DatasetTypeNotSupportedError() 

910 

911 with unittest.mock.patch.object( 

912 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

913 "_determine_put_formatter_location", 

914 side_effect=raiser, 

915 ): 

916 # verify the non-composite ref execution path: 

917 with self.assertRaises(DatasetTypeNotSupportedError): 

918 datastore.getURIs(ref, predict=True) 

919 

920 # verify the composite-ref execution path: 

921 with self.assertRaises(DatasetTypeNotSupportedError): 

922 datastore.getURIs(compRef, predict=True) 

923 

924 

925class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

926 """Posix datastore tests but with checksums disabled.""" 

927 

928 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

929 

930 def testChecksum(self): 

931 """Ensure that checksums have not been calculated.""" 

932 

933 datastore = self.makeDatastore() 

934 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

935 dimensions = self.universe.extract(("visit", "physical_filter")) 

936 metrics = makeExampleMetrics() 

937 

938 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

939 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

940 

941 # Configuration should have disabled checksum calculation 

942 datastore.put(metrics, ref) 

943 infos = datastore.getStoredItemsInfo(ref) 

944 self.assertIsNone(infos[0].checksum) 

945 

946 # Remove put back but with checksums enabled explicitly 

947 datastore.remove(ref) 

948 datastore.useChecksum = True 

949 datastore.put(metrics, ref) 

950 

951 infos = datastore.getStoredItemsInfo(ref) 

952 self.assertIsNotNone(infos[0].checksum) 

953 

954 

955class TrashDatastoreTestCase(PosixDatastoreTestCase): 

956 """Restrict trash test to FileDatastore.""" 

957 

958 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

959 

960 def testTrash(self): 

961 datastore, *refs = self.prepDeleteTest(n_refs=10) 

962 

963 # Trash one of them. 

964 ref = refs.pop() 

965 uri = datastore.getURI(ref) 

966 datastore.trash(ref) 

967 self.assertTrue(uri.exists(), uri) # Not deleted yet 

968 datastore.emptyTrash() 

969 self.assertFalse(uri.exists(), uri) 

970 

971 # Trash it again should be fine. 

972 datastore.trash(ref) 

973 

974 # Trash multiple items at once. 

975 subset = [refs.pop(), refs.pop()] 

976 datastore.trash(subset) 

977 datastore.emptyTrash() 

978 

979 # Remove a record and trash should do nothing. 

980 # This is execution butler scenario. 

981 ref = refs.pop() 

982 uri = datastore.getURI(ref) 

983 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

984 self.assertTrue(uri.exists()) 

985 datastore.trash(ref) 

986 datastore.emptyTrash() 

987 self.assertTrue(uri.exists()) 

988 

989 # Switch on trust and it should delete the file. 

990 datastore.trustGetRequest = True 

991 datastore.trash([ref]) 

992 self.assertFalse(uri.exists()) 

993 

994 # Remove multiples at once in trust mode. 

995 subset = [refs.pop() for i in range(3)] 

996 datastore.trash(subset) 

997 datastore.trash(refs.pop()) # Check that a single ref can trash 

998 

999 

1000class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1001 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1002 

1003 def setUp(self): 

1004 # Override the working directory before calling the base class 

1005 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1006 super().setUp() 

1007 

1008 def testCleanup(self): 

1009 """Test that a failed formatter write does cleanup a partial file.""" 

1010 metrics = makeExampleMetrics() 

1011 datastore = self.makeDatastore() 

1012 

1013 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1014 

1015 dimensions = self.universe.extract(("visit", "physical_filter")) 

1016 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1017 

1018 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1019 

1020 # Determine where the file will end up (we assume Formatters use 

1021 # the same file extension) 

1022 expectedUri = datastore.getURI(ref, predict=True) 

1023 self.assertEqual(expectedUri.fragment, "predicted") 

1024 

1025 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1026 

1027 # Try formatter that fails and formatter that fails and leaves 

1028 # a file behind 

1029 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1030 with self.subTest(formatter=formatter): 

1031 

1032 # Monkey patch the formatter 

1033 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1034 

1035 # Try to put the dataset, it should fail 

1036 with self.assertRaises(Exception): 

1037 datastore.put(metrics, ref) 

1038 

1039 # Check that there is no file on disk 

1040 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1041 

1042 # Check that there is a directory 

1043 dir = expectedUri.dirname() 

1044 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1045 

1046 # Force YamlFormatter and check that this time a file is written 

1047 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1048 datastore.put(metrics, ref) 

1049 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1050 datastore.remove(ref) 

1051 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1052 

1053 

1054class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1055 """PosixDatastore specialization""" 

1056 

1057 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1058 uriScheme = "mem" 

1059 hasUnsupportedPut = False 

1060 ingestTransferModes = () 

1061 isEphemeral = True 

1062 rootKeys = None 

1063 validationCanFail = False 

1064 

1065 

1066class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1067 """ChainedDatastore specialization using a POSIXDatastore""" 

1068 

1069 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1070 hasUnsupportedPut = False 

1071 canIngestNoTransferAuto = False 

1072 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto") 

1073 isEphemeral = False 

1074 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1075 validationCanFail = True 

1076 

1077 

1078class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1079 """ChainedDatastore specialization using all InMemoryDatastore""" 

1080 

1081 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1082 validationCanFail = False 

1083 

1084 

1085class DatastoreConstraintsTests(DatastoreTestsBase): 

1086 """Basic tests of constraints model of Datastores.""" 

1087 

1088 def testConstraints(self): 

1089 """Test constraints model. Assumes that each test class has the 

1090 same constraints.""" 

1091 metrics = makeExampleMetrics() 

1092 datastore = self.makeDatastore() 

1093 

1094 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1095 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1096 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1097 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1098 

1099 # Write empty file suitable for ingest check (JSON and YAML variants) 

1100 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1101 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1102 for datasetTypeName, sc, accepted in ( 

1103 ("metric", sc1, True), 

1104 ("metric5", sc1, False), 

1105 ("metric33", sc1, True), 

1106 ("metric5", sc2, True), 

1107 ): 

1108 # Choose different temp file depending on StorageClass 

1109 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1110 

1111 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1112 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

1113 if accepted: 

1114 datastore.put(metrics, ref) 

1115 self.assertTrue(datastore.exists(ref)) 

1116 datastore.remove(ref) 

1117 

1118 # Try ingest 

1119 if self.canIngest: 

1120 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1121 self.assertTrue(datastore.exists(ref)) 

1122 datastore.remove(ref) 

1123 else: 

1124 with self.assertRaises(DatasetTypeNotSupportedError): 

1125 datastore.put(metrics, ref) 

1126 self.assertFalse(datastore.exists(ref)) 

1127 

1128 # Again with ingest 

1129 if self.canIngest: 

1130 with self.assertRaises(DatasetTypeNotSupportedError): 

1131 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1132 self.assertFalse(datastore.exists(ref)) 

1133 

1134 

1135class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1136 """PosixDatastore specialization""" 

1137 

1138 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1139 canIngest = True 

1140 

1141 def setUp(self): 

1142 # Override the working directory before calling the base class 

1143 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1144 super().setUp() 

1145 

1146 

1147class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1148 """InMemoryDatastore specialization""" 

1149 

1150 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1151 canIngest = False 

1152 

1153 

1154class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1155 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1156 at the ChainedDatstore""" 

1157 

1158 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1159 

1160 

1161class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1162 """ChainedDatastore specialization using a POSIXDatastore""" 

1163 

1164 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1165 

1166 

1167class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1168 """ChainedDatastore specialization using all InMemoryDatastore""" 

1169 

1170 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1171 canIngest = False 

1172 

1173 

1174class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1175 """Test that a chained datastore can control constraints per-datastore 

1176 even if child datastore would accept.""" 

1177 

1178 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1179 

1180 def setUp(self): 

1181 # Override the working directory before calling the base class 

1182 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1183 super().setUp() 

1184 

1185 def testConstraints(self): 

1186 """Test chained datastore constraints model.""" 

1187 metrics = makeExampleMetrics() 

1188 datastore = self.makeDatastore() 

1189 

1190 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1191 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1192 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1193 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1194 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1195 

1196 # Write empty file suitable for ingest check (JSON and YAML variants) 

1197 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1198 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1199 

1200 for typeName, dataId, sc, accept, ingest in ( 

1201 ("metric", dataId1, sc1, (False, True, False), True), 

1202 ("metric5", dataId1, sc1, (False, False, False), False), 

1203 ("metric5", dataId2, sc1, (True, False, False), False), 

1204 ("metric33", dataId2, sc2, (True, True, False), True), 

1205 ("metric5", dataId1, sc2, (False, True, False), True), 

1206 ): 

1207 

1208 # Choose different temp file depending on StorageClass 

1209 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1210 

1211 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1212 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) 

1213 if any(accept): 

1214 datastore.put(metrics, ref) 

1215 self.assertTrue(datastore.exists(ref)) 

1216 

1217 # Check each datastore inside the chained datastore 

1218 for childDatastore, expected in zip(datastore.datastores, accept): 

1219 self.assertEqual( 

1220 childDatastore.exists(ref), 

1221 expected, 

1222 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1223 ) 

1224 

1225 datastore.remove(ref) 

1226 

1227 # Check that ingest works 

1228 if ingest: 

1229 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1230 self.assertTrue(datastore.exists(ref)) 

1231 

1232 # Check each datastore inside the chained datastore 

1233 for childDatastore, expected in zip(datastore.datastores, accept): 

1234 # Ephemeral datastores means InMemory at the moment 

1235 # and that does not accept ingest of files. 

1236 if childDatastore.isEphemeral: 

1237 expected = False 

1238 self.assertEqual( 

1239 childDatastore.exists(ref), 

1240 expected, 

1241 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1242 ) 

1243 

1244 datastore.remove(ref) 

1245 else: 

1246 with self.assertRaises(DatasetTypeNotSupportedError): 

1247 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1248 

1249 else: 

1250 with self.assertRaises(DatasetTypeNotSupportedError): 

1251 datastore.put(metrics, ref) 

1252 self.assertFalse(datastore.exists(ref)) 

1253 

1254 # Again with ingest 

1255 with self.assertRaises(DatasetTypeNotSupportedError): 

1256 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1257 self.assertFalse(datastore.exists(ref)) 

1258 

1259 

1260class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1261 """Tests for datastore caching infrastructure.""" 

1262 

1263 @classmethod 

1264 def setUpClass(cls): 

1265 cls.storageClassFactory = StorageClassFactory() 

1266 cls.universe = DimensionUniverse() 

1267 

1268 # Ensure that we load the test storage class definitions. 

1269 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1270 cls.storageClassFactory.addFromConfig(scConfigFile) 

1271 

1272 def setUp(self): 

1273 self.id = 0 

1274 

1275 # Create a root that we can use for caching tests. 

1276 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1277 

1278 # Create some test dataset refs and associated test files 

1279 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1280 dimensions = self.universe.extract(("visit", "physical_filter")) 

1281 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1282 

1283 # Create list of refs and list of temporary files 

1284 n_datasets = 10 

1285 self.refs = [ 

1286 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False) 

1287 for n in range(n_datasets) 

1288 ] 

1289 

1290 root_uri = ResourcePath(self.root, forceDirectory=True) 

1291 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1292 

1293 # Create test files. 

1294 for uri in self.files: 

1295 uri.write(b"0123456789") 

1296 

1297 # Create some composite refs with component files. 

1298 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1299 self.composite_refs = [ 

1300 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3) 

1301 ] 

1302 self.comp_files = [] 

1303 self.comp_refs = [] 

1304 for n, ref in enumerate(self.composite_refs): 

1305 component_refs = [] 

1306 component_files = [] 

1307 for component in sc.components: 

1308 component_ref = ref.makeComponentRef(component) 

1309 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1310 component_refs.append(component_ref) 

1311 component_files.append(file) 

1312 file.write(b"9876543210") 

1313 

1314 self.comp_files.append(component_files) 

1315 self.comp_refs.append(component_refs) 

1316 

1317 def tearDown(self): 

1318 if self.root is not None and os.path.exists(self.root): 

1319 shutil.rmtree(self.root, ignore_errors=True) 

1320 

1321 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1322 config = Config.fromYaml(config_str) 

1323 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1324 

1325 def testNoCacheDir(self): 

1326 config_str = """ 

1327cached: 

1328 root: null 

1329 cacheable: 

1330 metric0: true 

1331 """ 

1332 cache_manager = self._make_cache_manager(config_str) 

1333 

1334 # Look inside to check we don't have a cache directory 

1335 self.assertIsNone(cache_manager._cache_directory) 

1336 

1337 self.assertCache(cache_manager) 

1338 

1339 # Test that the cache directory is marked temporary 

1340 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1341 

1342 def testNoCacheDirReversed(self): 

1343 """Use default caching status and metric1 to false""" 

1344 config_str = """ 

1345cached: 

1346 root: null 

1347 default: true 

1348 cacheable: 

1349 metric1: false 

1350 """ 

1351 cache_manager = self._make_cache_manager(config_str) 

1352 

1353 self.assertCache(cache_manager) 

1354 

1355 def testExplicitCacheDir(self): 

1356 config_str = f""" 

1357cached: 

1358 root: '{self.root}' 

1359 cacheable: 

1360 metric0: true 

1361 """ 

1362 cache_manager = self._make_cache_manager(config_str) 

1363 

1364 # Look inside to check we do have a cache directory. 

1365 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1366 

1367 self.assertCache(cache_manager) 

1368 

1369 # Test that the cache directory is not marked temporary 

1370 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1371 

1372 def assertCache(self, cache_manager): 

1373 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1374 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1375 

1376 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1377 self.assertIsInstance(uri, ResourcePath) 

1378 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1379 

1380 # Check presence in cache using ref and then using file extension. 

1381 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1382 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1383 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1384 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1385 

1386 # Cached file should no longer exist but uncached file should be 

1387 # unaffected. 

1388 self.assertFalse(self.files[0].exists()) 

1389 self.assertTrue(self.files[1].exists()) 

1390 

1391 # Should find this file and it should be within the cache directory. 

1392 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1393 self.assertTrue(found.exists()) 

1394 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1395 

1396 # Should not be able to find these in cache 

1397 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1398 self.assertIsNone(found) 

1399 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1400 self.assertIsNone(found) 

1401 

1402 def testNoCache(self): 

1403 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1404 for uri, ref in zip(self.files, self.refs): 

1405 self.assertFalse(cache_manager.should_be_cached(ref)) 

1406 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1407 self.assertFalse(cache_manager.known_to_cache(ref)) 

1408 with cache_manager.find_in_cache(ref, ".txt") as found: 

1409 self.assertIsNone(found, msg=f"{cache_manager}") 

1410 

1411 def _expiration_config(self, mode: str, threshold: int) -> str: 

1412 return f""" 

1413cached: 

1414 default: true 

1415 expiry: 

1416 mode: {mode} 

1417 threshold: {threshold} 

1418 cacheable: 

1419 unused: true 

1420 """ 

1421 

1422 def testCacheExpiryFiles(self): 

1423 threshold = 2 # Keep at least 2 files. 

1424 mode = "files" 

1425 config_str = self._expiration_config(mode, threshold) 

1426 

1427 cache_manager = self._make_cache_manager(config_str) 

1428 

1429 # Check that an empty cache returns unknown for arbitrary ref 

1430 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1431 

1432 # Should end with datasets: 2, 3, 4 

1433 self.assertExpiration(cache_manager, 5, threshold + 1) 

1434 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1435 

1436 # Check that we will not expire a file that is actively in use. 

1437 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1438 self.assertIsNotNone(found) 

1439 

1440 # Trigger cache expiration that should remove the file 

1441 # we just retrieved. Should now have: 3, 4, 5 

1442 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1443 self.assertIsNotNone(cached) 

1444 

1445 # Cache should still report the standard file count. 

1446 self.assertEqual(cache_manager.file_count, threshold + 1) 

1447 

1448 # Add additional entry to cache. 

1449 # Should now have 4, 5, 6 

1450 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1451 self.assertIsNotNone(cached) 

1452 

1453 # Is the file still there? 

1454 self.assertTrue(found.exists()) 

1455 

1456 # Can we read it? 

1457 data = found.read() 

1458 self.assertGreater(len(data), 0) 

1459 

1460 # Outside context the file should no longer exist. 

1461 self.assertFalse(found.exists()) 

1462 

1463 # File count should not have changed. 

1464 self.assertEqual(cache_manager.file_count, threshold + 1) 

1465 

1466 # Dataset 2 was in the exempt directory but because hardlinks 

1467 # are used it was deleted from the main cache during cache expiry 

1468 # above and so should no longer be found. 

1469 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1470 self.assertIsNone(found) 

1471 

1472 # And the one stored after it is also gone. 

1473 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1474 self.assertIsNone(found) 

1475 

1476 # But dataset 4 is present. 

1477 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1478 self.assertIsNotNone(found) 

1479 

1480 # Adding a new dataset to the cache should now delete it. 

1481 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1482 

1483 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1484 self.assertIsNone(found) 

1485 

1486 def testCacheExpiryDatasets(self): 

1487 threshold = 2 # Keep 2 datasets. 

1488 mode = "datasets" 

1489 config_str = self._expiration_config(mode, threshold) 

1490 

1491 cache_manager = self._make_cache_manager(config_str) 

1492 self.assertExpiration(cache_manager, 5, threshold + 1) 

1493 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1494 

1495 def testCacheExpiryDatasetsComposite(self): 

1496 threshold = 2 # Keep 2 datasets. 

1497 mode = "datasets" 

1498 config_str = self._expiration_config(mode, threshold) 

1499 

1500 cache_manager = self._make_cache_manager(config_str) 

1501 

1502 n_datasets = 3 

1503 for i in range(n_datasets): 

1504 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1505 cached = cache_manager.move_to_cache(component_file, component_ref) 

1506 self.assertIsNotNone(cached) 

1507 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1508 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1509 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1510 

1511 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1512 

1513 # Write two new non-composite and the number of files should drop. 

1514 self.assertExpiration(cache_manager, 2, 5) 

1515 

1516 def testCacheExpirySize(self): 

1517 threshold = 55 # Each file is 10 bytes 

1518 mode = "size" 

1519 config_str = self._expiration_config(mode, threshold) 

1520 

1521 cache_manager = self._make_cache_manager(config_str) 

1522 self.assertExpiration(cache_manager, 10, 6) 

1523 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1524 

1525 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1526 """Insert the datasets and then check the number retained.""" 

1527 for i in range(n_datasets): 

1528 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1529 self.assertIsNotNone(cached) 

1530 

1531 self.assertEqual(cache_manager.file_count, n_retained) 

1532 

1533 # The oldest file should not be in the cache any more. 

1534 for i in range(n_datasets): 

1535 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1536 if i >= n_datasets - n_retained: 

1537 self.assertIsInstance(found, ResourcePath) 

1538 else: 

1539 self.assertIsNone(found) 

1540 

1541 def testCacheExpiryAge(self): 

1542 threshold = 1 # Expire older than 2 seconds 

1543 mode = "age" 

1544 config_str = self._expiration_config(mode, threshold) 

1545 

1546 cache_manager = self._make_cache_manager(config_str) 

1547 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1548 

1549 # Insert 3 files, then sleep, then insert more. 

1550 for i in range(2): 

1551 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1552 self.assertIsNotNone(cached) 

1553 time.sleep(2.0) 

1554 for j in range(4): 

1555 i = 2 + j # Continue the counting 

1556 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1557 self.assertIsNotNone(cached) 

1558 

1559 # Only the files written after the sleep should exist. 

1560 self.assertEqual(cache_manager.file_count, 4) 

1561 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1562 self.assertIsNone(found) 

1563 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1564 self.assertIsInstance(found, ResourcePath) 

1565 

1566 

1567class DatasetRefURIsTestCase(unittest.TestCase): 

1568 """Tests for DatasetRefURIs.""" 

1569 

1570 def testSequenceAccess(self): 

1571 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1572 uris = DatasetRefURIs() 

1573 

1574 self.assertEqual(len(uris), 2) 

1575 self.assertEqual(uris[0], None) 

1576 self.assertEqual(uris[1], {}) 

1577 

1578 primaryURI = ResourcePath("1/2/3") 

1579 componentURI = ResourcePath("a/b/c") 

1580 

1581 # affirm that DatasetRefURIs does not support MutableSequence functions 

1582 with self.assertRaises(TypeError): 

1583 uris[0] = primaryURI 

1584 with self.assertRaises(TypeError): 

1585 uris[1] = {"foo": componentURI} 

1586 

1587 # but DatasetRefURIs can be set by property name: 

1588 uris.primaryURI = primaryURI 

1589 uris.componentURIs = {"foo": componentURI} 

1590 self.assertEqual(uris.primaryURI, primaryURI) 

1591 self.assertEqual(uris[0], primaryURI) 

1592 

1593 primary, components = uris 

1594 self.assertEqual(primary, primaryURI) 

1595 self.assertEqual(components, {"foo": componentURI}) 

1596 

1597 def testRepr(self): 

1598 """Verify __repr__ output.""" 

1599 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")}) 

1600 self.assertEqual( 

1601 repr(uris), 

1602 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), ' 

1603 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})', 

1604 ) 

1605 

1606 

1607class DataIdForTestTestCase(unittest.TestCase): 

1608 """Tests for the DataIdForTest class.""" 

1609 

1610 def testImmutable(self): 

1611 """Verify that an instance is immutable by default.""" 

1612 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1613 initial_hash = hash(dataId) 

1614 

1615 with self.assertRaises(RuntimeError): 

1616 dataId["instrument"] = "foo" 

1617 

1618 with self.assertRaises(RuntimeError): 

1619 del dataId["instrument"] 

1620 

1621 assert sys.version_info[0] == 3 

1622 if sys.version_info[1] >= 9: 

1623 with self.assertRaises(RuntimeError): 

1624 dataId |= dict(foo="bar") 

1625 

1626 with self.assertRaises(RuntimeError): 

1627 dataId.pop("instrument") 

1628 

1629 with self.assertRaises(RuntimeError): 

1630 dataId.popitem() 

1631 

1632 with self.assertRaises(RuntimeError): 

1633 dataId.update(dict(instrument="foo")) 

1634 

1635 # verify that the hash value has not changed. 

1636 self.assertEqual(initial_hash, hash(dataId)) 

1637 

1638 def testMutable(self): 

1639 """Verify that an instance can be made mutable (unfrozen).""" 

1640 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1641 initial_hash = hash(dataId) 

1642 dataId.frozen = False 

1643 self.assertEqual(initial_hash, hash(dataId)) 

1644 

1645 dataId["instrument"] = "foo" 

1646 self.assertEqual(dataId["instrument"], "foo") 

1647 self.assertNotEqual(initial_hash, hash(dataId)) 

1648 initial_hash = hash(dataId) 

1649 

1650 del dataId["instrument"] 

1651 self.assertTrue("instrument" not in dataId) 

1652 self.assertNotEqual(initial_hash, hash(dataId)) 

1653 initial_hash = hash(dataId) 

1654 

1655 assert sys.version_info[0] == 3 

1656 if sys.version_info[1] >= 9: 

1657 dataId |= dict(foo="bar") 

1658 self.assertEqual(dataId["foo"], "bar") 

1659 self.assertNotEqual(initial_hash, hash(dataId)) 

1660 initial_hash = hash(dataId) 

1661 

1662 dataId.pop("visit") 

1663 self.assertTrue("visit" not in dataId) 

1664 self.assertNotEqual(initial_hash, hash(dataId)) 

1665 initial_hash = hash(dataId) 

1666 

1667 dataId.popitem() 

1668 self.assertTrue("physical_filter" not in dataId) 

1669 self.assertNotEqual(initial_hash, hash(dataId)) 

1670 initial_hash = hash(dataId) 

1671 

1672 dataId.update(dict(instrument="foo")) 

1673 self.assertEqual(dataId["instrument"], "foo") 

1674 self.assertNotEqual(initial_hash, hash(dataId)) 

1675 initial_hash = hash(dataId) 

1676 

1677 

1678if __name__ == "__main__": 1678 ↛ 1679line 1678 didn't jump to line 1679, because the condition on line 1678 was never true

1679 unittest.main()