Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from datetime import datetime 

27 

28import sqlalchemy 

29 

30from ...core import ( 

31 DataCoordinate, 

32 DatasetType, 

33 DimensionGraph, 

34 StorageClass, 

35 ddl, 

36) 

37from .._registry import Registry, ConflictingDefinitionError, OrphanedRecordError 

38 

39 

40class RegistryTests(ABC): 

41 """Generic tests for the `Registry` class that can be subclassed to 

42 generate tests for different configurations. 

43 """ 

44 

45 @abstractmethod 

46 def makeRegistry(self) -> Registry: 

47 raise NotImplementedError() 

48 

49 def assertRowCount(self, registry: Registry, table: str, count: int): 

50 """Check the number of rows in table. 

51 """ 

52 # TODO: all tests that rely on this method should be rewritten, as it 

53 # needs to depend on Registry implementation details to have any chance 

54 # of working. 

55 sql = sqlalchemy.sql.select( 

56 [sqlalchemy.sql.func.count()] 

57 ).select_from( 

58 getattr(registry._tables, table) 

59 ) 

60 self.assertEqual(registry._db.query(sql).scalar(), count) 

61 

62 def testOpaque(self): 

63 """Tests for `Registry.registerOpaqueTable`, 

64 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

65 `Registry.deleteOpaqueData`. 

66 """ 

67 registry = self.makeRegistry() 

68 table = "opaque_table_for_testing" 

69 registry.registerOpaqueTable( 

70 table, 

71 spec=ddl.TableSpec( 

72 fields=[ 

73 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

74 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

75 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

76 ], 

77 ) 

78 ) 

79 rows = [ 

80 {"id": 1, "name": "one", "count": None}, 

81 {"id": 2, "name": "two", "count": 5}, 

82 {"id": 3, "name": "three", "count": 6}, 

83 ] 

84 registry.insertOpaqueData(table, *rows) 

85 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

86 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

87 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

88 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

89 registry.deleteOpaqueData(table, id=3) 

90 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

91 registry.deleteOpaqueData(table) 

92 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

93 

94 def testDatasetType(self): 

95 """Tests for `Registry.registerDatasetType` and 

96 `Registry.getDatasetType`. 

97 """ 

98 registry = self.makeRegistry() 

99 # Check valid insert 

100 datasetTypeName = "test" 

101 storageClass = StorageClass("testDatasetType") 

102 registry.storageClasses.registerStorageClass(storageClass) 

103 dimensions = registry.dimensions.extract(("instrument", "visit")) 

104 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

105 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

106 # Inserting for the first time should return True 

107 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

108 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

109 self.assertEqual(outDatasetType1, inDatasetType) 

110 

111 # Re-inserting should work 

112 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

113 # Except when they are not identical 

114 with self.assertRaises(ConflictingDefinitionError): 

115 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

116 registry.registerDatasetType(nonIdenticalDatasetType) 

117 

118 # Template can be None 

119 datasetTypeName = "testNoneTemplate" 

120 storageClass = StorageClass("testDatasetType2") 

121 registry.storageClasses.registerStorageClass(storageClass) 

122 dimensions = registry.dimensions.extract(("instrument", "visit")) 

123 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

124 registry.registerDatasetType(inDatasetType) 

125 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

126 self.assertEqual(outDatasetType2, inDatasetType) 

127 

128 allTypes = registry.getAllDatasetTypes() 

129 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

130 

131 def testDimensions(self): 

132 """Tests for `Registry.insertDimensionData` and 

133 `Registry.expandDataId`. 

134 """ 

135 registry = self.makeRegistry() 

136 dimensionName = "instrument" 

137 dimension = registry.dimensions[dimensionName] 

138 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2} 

139 registry.insertDimensionData(dimensionName, dimensionValue) 

140 # Inserting the same value twice should fail 

141 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

142 registry.insertDimensionData(dimensionName, dimensionValue) 

143 # expandDataId should retrieve the record we just inserted 

144 self.assertEqual( 

145 registry.expandDataId( 

146 instrument="DummyCam", 

147 graph=dimension.graph 

148 ).records[dimensionName].toDict(), 

149 dimensionValue 

150 ) 

151 # expandDataId should raise if there is no record with the given ID. 

152 with self.assertRaises(LookupError): 

153 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

154 # abstract_filter doesn't have a table; insert should fail. 

155 with self.assertRaises(TypeError): 

156 registry.insertDimensionData("abstract_filter", {"abstract_filter": "i"}) 

157 dimensionName2 = "physical_filter" 

158 dimension2 = registry.dimensions[dimensionName2] 

159 dimensionValue2 = {"name": "DummyCam_i", "abstract_filter": "i"} 

160 # Missing required dependency ("instrument") should fail 

161 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

162 registry.insertDimensionData(dimensionName2, dimensionValue2) 

163 # Adding required dependency should fix the failure 

164 dimensionValue2["instrument"] = "DummyCam" 

165 registry.insertDimensionData(dimensionName2, dimensionValue2) 

166 # expandDataId should retrieve the record we just inserted. 

167 self.assertEqual( 

168 registry.expandDataId( 

169 instrument="DummyCam", physical_filter="DummyCam_i", 

170 graph=dimension2.graph 

171 ).records[dimensionName2].toDict(), 

172 dimensionValue2 

173 ) 

174 

175 def testDataset(self): 

176 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

177 and `Registry.removeDataset`. 

178 """ 

179 registry = self.makeRegistry() 

180 run = "test" 

181 registry.registerRun(run) 

182 storageClass = StorageClass("testDataset") 

183 registry.storageClasses.registerStorageClass(storageClass) 

184 datasetType = DatasetType(name="testtype", dimensions=registry.dimensions.extract(("instrument",)), 

185 storageClass=storageClass) 

186 registry.registerDatasetType(datasetType) 

187 dataId = {"instrument": "DummyCam"} 

188 registry.insertDimensionData("instrument", dataId) 

189 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

190 outRef = registry.getDataset(ref.id) 

191 self.assertIsNotNone(ref.id) 

192 self.assertEqual(ref, outRef) 

193 with self.assertRaises(ConflictingDefinitionError): 

194 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

195 registry.removeDataset(ref) 

196 self.assertIsNone(registry.find(run, datasetType, dataId)) 

197 

198 def testComponents(self): 

199 """Tests for `Registry.attachComponent` and other dataset operations 

200 on composite datasets. 

201 """ 

202 registry = self.makeRegistry() 

203 childStorageClass = StorageClass("testComponentsChild") 

204 registry.storageClasses.registerStorageClass(childStorageClass) 

205 parentStorageClass = StorageClass("testComponentsParent", 

206 components={"child1": childStorageClass, 

207 "child2": childStorageClass}) 

208 registry.storageClasses.registerStorageClass(parentStorageClass) 

209 parentDatasetType = DatasetType(name="parent", 

210 dimensions=registry.dimensions.extract(("instrument",)), 

211 storageClass=parentStorageClass) 

212 childDatasetType1 = DatasetType(name="parent.child1", 

213 dimensions=registry.dimensions.extract(("instrument",)), 

214 storageClass=childStorageClass) 

215 childDatasetType2 = DatasetType(name="parent.child2", 

216 dimensions=registry.dimensions.extract(("instrument",)), 

217 storageClass=childStorageClass) 

218 registry.registerDatasetType(parentDatasetType) 

219 registry.registerDatasetType(childDatasetType1) 

220 registry.registerDatasetType(childDatasetType2) 

221 dataId = {"instrument": "DummyCam"} 

222 registry.insertDimensionData("instrument", dataId) 

223 run = "test" 

224 registry.registerRun(run) 

225 parent, = registry.insertDatasets(parentDatasetType, dataIds=[dataId], run=run) 

226 children = {"child1": registry.insertDatasets(childDatasetType1, dataIds=[dataId], run=run)[0], 

227 "child2": registry.insertDatasets(childDatasetType2, dataIds=[dataId], run=run)[0]} 

228 for name, child in children.items(): 

229 registry.attachComponent(name, parent, child) 

230 self.assertEqual(parent.components, children) 

231 outParent = registry.getDataset(parent.id) 

232 self.assertEqual(outParent.components, children) 

233 # Remove the parent; this should remove both children. 

234 registry.removeDataset(parent) 

235 self.assertIsNone(registry.find(run, parentDatasetType, dataId)) 

236 self.assertIsNone(registry.find(run, childDatasetType1, dataId)) 

237 self.assertIsNone(registry.find(run, childDatasetType2, dataId)) 

238 

239 def testFind(self): 

240 """Tests for `Registry.find`. 

241 """ 

242 registry = self.makeRegistry() 

243 storageClass = StorageClass("testFind") 

244 registry.storageClasses.registerStorageClass(storageClass) 

245 datasetType = DatasetType(name="dummytype", 

246 dimensions=registry.dimensions.extract(("instrument", "visit")), 

247 storageClass=storageClass) 

248 registry.registerDatasetType(datasetType) 

249 registry.insertDimensionData("instrument", 

250 {"instrument": "DummyCam"}, 

251 {"instrument": "MyCam"}) 

252 registry.insertDimensionData("physical_filter", 

253 {"instrument": "DummyCam", "physical_filter": "d-r", 

254 "abstract_filter": "r"}, 

255 {"instrument": "MyCam", "physical_filter": "m-r", 

256 "abstract_filter": "r"}) 

257 registry.insertDimensionData("visit", 

258 {"instrument": "DummyCam", "id": 0, "name": "zero", 

259 "physical_filter": "d-r"}, 

260 {"instrument": "DummyCam", "id": 1, "name": "one", 

261 "physical_filter": "d-r"}, 

262 {"instrument": "DummyCam", "id": 2, "name": "two", 

263 "physical_filter": "d-r"}, 

264 {"instrument": "MyCam", "id": 2, "name": "two", 

265 "physical_filter": "m-r"}) 

266 run = "test" 

267 dataId = {"instrument": "DummyCam", "visit": 0, "physical_filter": "d-r", "abstract_filter": None} 

268 registry.registerRun(run) 

269 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

270 outputRef = registry.find(run, datasetType, dataId) 

271 self.assertEqual(outputRef, inputRef) 

272 # Check that retrieval with invalid dataId raises 

273 with self.assertRaises(LookupError): 

274 dataId = {"instrument": "DummyCam", "abstract_filter": "g"} # should be visit 

275 registry.find(run, datasetType, dataId) 

276 # Check that different dataIds match to different datasets 

277 dataId1 = {"instrument": "DummyCam", "visit": 1, "physical_filter": "d-r", "abstract_filter": None} 

278 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

279 dataId2 = {"instrument": "DummyCam", "visit": 2, "physical_filter": "d-r", "abstract_filter": None} 

280 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

281 dataId3 = {"instrument": "MyCam", "visit": 2, "physical_filter": "m-r", "abstract_filter": None} 

282 inputRef3, = registry.insertDatasets(datasetType, dataIds=[dataId3], run=run) 

283 self.assertEqual(registry.find(run, datasetType, dataId1), inputRef1) 

284 self.assertEqual(registry.find(run, datasetType, dataId2), inputRef2) 

285 self.assertEqual(registry.find(run, datasetType, dataId3), inputRef3) 

286 self.assertNotEqual(registry.find(run, datasetType, dataId1), inputRef2) 

287 self.assertNotEqual(registry.find(run, datasetType, dataId2), inputRef1) 

288 self.assertNotEqual(registry.find(run, datasetType, dataId3), inputRef1) 

289 # Check that requesting a non-existing dataId returns None 

290 nonExistingDataId = {"instrument": "DummyCam", "visit": 42} 

291 self.assertIsNone(registry.find(run, datasetType, nonExistingDataId)) 

292 

293 def testCollections(self): 

294 """Tests for `Registry.getAllCollections`, `Registry.registerRun`, 

295 `Registry.disassociate`, and interactions between collections and 

296 `Registry.find`. 

297 """ 

298 registry = self.makeRegistry() 

299 storageClass = StorageClass("testCollections") 

300 registry.storageClasses.registerStorageClass(storageClass) 

301 datasetType = DatasetType(name="dummytype", 

302 dimensions=registry.dimensions.extract(("instrument", "visit")), 

303 storageClass=storageClass) 

304 registry.registerDatasetType(datasetType) 

305 registry.insertDimensionData("instrument", {"instrument": "DummyCam"}) 

306 registry.insertDimensionData("physical_filter", {"instrument": "DummyCam", "physical_filter": "d-r", 

307 "abstract_filter": "R"}) 

308 registry.insertDimensionData("visit", {"instrument": "DummyCam", "id": 0, "name": "zero", 

309 "physical_filter": "d-r"}) 

310 registry.insertDimensionData("visit", {"instrument": "DummyCam", "id": 1, "name": "one", 

311 "physical_filter": "d-r"}) 

312 run = "ingest" 

313 registry.registerRun(run) 

314 # Dataset.physical_filter should be populated as well here from the 

315 # visit Dimension values. 

316 dataId1 = {"instrument": "DummyCam", "visit": 0} 

317 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

318 dataId2 = {"instrument": "DummyCam", "visit": 1} 

319 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

320 # We should be able to find both datasets in their run 

321 outputRef = registry.find(run, datasetType, dataId1) 

322 self.assertEqual(outputRef, inputRef1) 

323 outputRef = registry.find(run, datasetType, dataId2) 

324 self.assertEqual(outputRef, inputRef2) 

325 # and with the associated collection 

326 newCollection = "something" 

327 registry.associate(newCollection, [inputRef1, inputRef2]) 

328 outputRef = registry.find(newCollection, datasetType, dataId1) 

329 self.assertEqual(outputRef, inputRef1) 

330 outputRef = registry.find(newCollection, datasetType, dataId2) 

331 self.assertEqual(outputRef, inputRef2) 

332 # but no more after disassociation 

333 registry.disassociate(newCollection, [inputRef1, ]) 

334 self.assertIsNone(registry.find(newCollection, datasetType, dataId1)) 

335 outputRef = registry.find(newCollection, datasetType, dataId2) 

336 self.assertEqual(outputRef, inputRef2) 

337 collections = registry.getAllCollections() 

338 self.assertEqual(collections, {"something", "ingest"}) 

339 

340 def testAssociate(self): 

341 """Tests for `Registry.associate`. 

342 """ 

343 registry = self.makeRegistry() 

344 storageClass = StorageClass("testAssociate") 

345 registry.storageClasses.registerStorageClass(storageClass) 

346 dimensions = registry.dimensions.extract(("instrument", "visit")) 

347 datasetType1 = DatasetType(name="dummytype", dimensions=dimensions, storageClass=storageClass) 

348 registry.registerDatasetType(datasetType1) 

349 datasetType2 = DatasetType(name="smartytype", dimensions=dimensions, storageClass=storageClass) 

350 registry.registerDatasetType(datasetType2) 

351 registry.insertDimensionData("instrument", {"instrument": "DummyCam"}) 

352 registry.insertDimensionData("physical_filter", {"instrument": "DummyCam", "physical_filter": "d-r", 

353 "abstract_filter": "R"}) 

354 registry.insertDimensionData("visit", {"instrument": "DummyCam", "id": 0, "name": "zero", 

355 "physical_filter": "d-r"}) 

356 registry.insertDimensionData("visit", {"instrument": "DummyCam", "id": 1, "name": "one", 

357 "physical_filter": "d-r"}) 

358 run1 = "ingest1" 

359 registry.registerRun(run1) 

360 run2 = "ingest2" 

361 registry.registerRun(run2) 

362 run3 = "ingest3" 

363 registry.registerRun(run3) 

364 # Dataset.physical_filter should be populated as well here 

365 # from the visit Dimension values. 

366 dataId1 = {"instrument": "DummyCam", "visit": 0} 

367 dataId2 = {"instrument": "DummyCam", "visit": 1} 

368 ref1_run1, ref2_run1 = registry.insertDatasets(datasetType1, dataIds=[dataId1, dataId2], run=run1) 

369 ref1_run2, ref2_run2 = registry.insertDatasets(datasetType2, dataIds=[dataId1, dataId2], run=run2) 

370 ref1_run3, ref2_run3 = registry.insertDatasets(datasetType2, dataIds=[dataId1, dataId2], run=run3) 

371 for ref in (ref1_run1, ref2_run1, ref1_run2, ref2_run2, ref1_run3, ref2_run3): 

372 self.assertEqual(ref.dataId.records["visit"].physical_filter, "d-r") 

373 self.assertEqual(ref.dataId.records["physical_filter"].abstract_filter, "R") 

374 # should have exactly 4 rows in Dataset 

375 self.assertRowCount(registry, "dataset", 6) 

376 self.assertRowCount(registry, "dataset_collection", 6) 

377 # adding same DatasetRef to the same run is an error 

378 with self.assertRaises(ConflictingDefinitionError): 

379 registry.insertDatasets(datasetType1, dataIds=[dataId2], run=run1) 

380 # above exception must rollback and not add anything to Dataset 

381 self.assertRowCount(registry, "dataset", 6) 

382 self.assertRowCount(registry, "dataset_collection", 6) 

383 # associated refs from run1 with some other collection 

384 newCollection = "something" 

385 registry.associate(newCollection, [ref1_run1, ref2_run1]) 

386 self.assertRowCount(registry, "dataset_collection", 8) 

387 # associating same exact DatasetRef is OK (not doing anything), 

388 # two cases to test - single-ref and many-refs 

389 registry.associate(newCollection, [ref1_run1]) 

390 registry.associate(newCollection, [ref1_run1, ref2_run1]) 

391 self.assertRowCount(registry, "dataset_collection", 8) 

392 # associated refs from run2 with same other collection, this should 

393 # be OK because thy have different dataset type 

394 registry.associate(newCollection, [ref1_run2, ref2_run2]) 

395 self.assertRowCount(registry, "dataset_collection", 10) 

396 # associating DatasetRef with the same units but different ID is not OK 

397 with self.assertRaises(ConflictingDefinitionError): 

398 registry.associate(newCollection, [ref1_run3]) 

399 with self.assertRaises(ConflictingDefinitionError): 

400 registry.associate(newCollection, [ref1_run3, ref2_run3]) 

401 

402 def testDatasetLocations(self): 

403 """Tests for `Registry.insertDatasetLocations`, 

404 `Registry.getDatasetLocations`, and `Registry.removeDatasetLocations`. 

405 """ 

406 registry = self.makeRegistry() 

407 storageClass = StorageClass("testStorageInfo") 

408 registry.storageClasses.registerStorageClass(storageClass) 

409 datasetType = DatasetType(name="test", dimensions=registry.dimensions.extract(("instrument",)), 

410 storageClass=storageClass) 

411 datasetType2 = DatasetType(name="test2", dimensions=registry.dimensions.extract(("instrument",)), 

412 storageClass=storageClass) 

413 registry.registerDatasetType(datasetType) 

414 registry.registerDatasetType(datasetType2) 

415 registry.insertDimensionData("instrument", {"instrument": "DummyCam"}) 

416 run = "test" 

417 registry.registerRun(run) 

418 ref, = registry.insertDatasets(datasetType, dataIds=[{"instrument": "DummyCam"}], run=run) 

419 ref2, = registry.insertDatasets(datasetType2, dataIds=[{"instrument": "DummyCam"}], run=run) 

420 datastoreName = "dummystore" 

421 datastoreName2 = "dummystore2" 

422 # Test adding information about a new dataset 

423 registry.insertDatasetLocations(datastoreName, [ref]) 

424 addresses = registry.getDatasetLocations(ref) 

425 self.assertIn(datastoreName, addresses) 

426 self.assertEqual(len(addresses), 1) 

427 registry.insertDatasetLocations(datastoreName2, [ref, ref2]) 

428 addresses = registry.getDatasetLocations(ref) 

429 self.assertEqual(len(addresses), 2) 

430 self.assertIn(datastoreName, addresses) 

431 self.assertIn(datastoreName2, addresses) 

432 registry.removeDatasetLocation(datastoreName, ref) 

433 addresses = registry.getDatasetLocations(ref) 

434 self.assertEqual(len(addresses), 1) 

435 self.assertNotIn(datastoreName, addresses) 

436 self.assertIn(datastoreName2, addresses) 

437 with self.assertRaises(OrphanedRecordError): 

438 registry.removeDataset(ref) 

439 registry.removeDatasetLocation(datastoreName2, ref) 

440 addresses = registry.getDatasetLocations(ref) 

441 self.assertEqual(len(addresses), 0) 

442 self.assertNotIn(datastoreName2, addresses) 

443 registry.removeDataset(ref) # should not raise 

444 addresses = registry.getDatasetLocations(ref2) 

445 self.assertEqual(len(addresses), 1) 

446 self.assertIn(datastoreName2, addresses) 

447 

448 def testBasicTransaction(self): 

449 """Test that all operations within a single transaction block are 

450 rolled back if an exception propagates out of the block. 

451 """ 

452 registry = self.makeRegistry() 

453 storageClass = StorageClass("testDatasetType") 

454 registry.storageClasses.registerStorageClass(storageClass) 

455 dimensions = registry.dimensions.extract(("instrument",)) 

456 dataId = {"instrument": "DummyCam"} 

457 datasetTypeA = DatasetType(name="A", 

458 dimensions=dimensions, 

459 storageClass=storageClass) 

460 datasetTypeB = DatasetType(name="B", 

461 dimensions=dimensions, 

462 storageClass=storageClass) 

463 datasetTypeC = DatasetType(name="C", 

464 dimensions=dimensions, 

465 storageClass=storageClass) 

466 run = "test" 

467 registry.registerRun(run) 

468 refId = None 

469 with registry.transaction(): 

470 registry.registerDatasetType(datasetTypeA) 

471 with self.assertRaises(ValueError): 

472 with registry.transaction(): 

473 registry.registerDatasetType(datasetTypeB) 

474 registry.registerDatasetType(datasetTypeC) 

475 registry.insertDimensionData("instrument", {"instrument": "DummyCam"}) 

476 ref, = registry.insertDatasets(datasetTypeA, dataIds=[dataId], run=run) 

477 refId = ref.id 

478 raise ValueError("Oops, something went wrong") 

479 # A should exist 

480 self.assertEqual(registry.getDatasetType("A"), datasetTypeA) 

481 # But B and C should both not exist 

482 with self.assertRaises(KeyError): 

483 registry.getDatasetType("B") 

484 with self.assertRaises(KeyError): 

485 registry.getDatasetType("C") 

486 # And neither should the dataset 

487 self.assertIsNotNone(refId) 

488 self.assertIsNone(registry.getDataset(refId)) 

489 # Or the Dimension entries 

490 with self.assertRaises(LookupError): 

491 registry.expandDataId({"instrument": "DummyCam"}) 

492 

493 def testNestedTransaction(self): 

494 """Test that operations within a transaction block are not rolled back 

495 if an exception propagates out of an inner transaction block and is 

496 then caught. 

497 """ 

498 registry = self.makeRegistry() 

499 dimension = registry.dimensions["instrument"] 

500 dataId1 = {"instrument": "DummyCam"} 

501 dataId2 = {"instrument": "DummyCam2"} 

502 checkpointReached = False 

503 with registry.transaction(): 

504 # This should be added and (ultimately) committed. 

505 registry.insertDimensionData(dimension, dataId1) 

506 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

507 with registry.transaction(): 

508 # This does not conflict, and should succeed (but not 

509 # be committed). 

510 registry.insertDimensionData(dimension, dataId2) 

511 checkpointReached = True 

512 # This should conflict and raise, triggerring a rollback 

513 # of the previous insertion within the same transaction 

514 # context, but not the original insertion in the outer 

515 # block. 

516 registry.insertDimensionData(dimension, dataId1) 

517 self.assertTrue(checkpointReached) 

518 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

519 with self.assertRaises(LookupError): 

520 registry.expandDataId(dataId2, graph=dimension.graph) 

521 

522 def testInstrumentDimensions(self): 

523 """Test queries involving only instrument dimensions, with no joins to 

524 skymap.""" 

525 registry = self.makeRegistry() 

526 

527 # need a bunch of dimensions and datasets for test 

528 registry.insertDimensionData( 

529 "instrument", 

530 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

531 ) 

532 registry.insertDimensionData( 

533 "physical_filter", 

534 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"), 

535 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

536 ) 

537 registry.insertDimensionData( 

538 "detector", 

539 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

540 ) 

541 registry.insertDimensionData( 

542 "visit", 

543 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"), 

544 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"), 

545 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"), 

546 ) 

547 registry.insertDimensionData( 

548 "exposure", 

549 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i"), 

550 dict(instrument="DummyCam", id=101, name="101", visit=10, physical_filter="dummy_i"), 

551 dict(instrument="DummyCam", id=110, name="110", visit=11, physical_filter="dummy_r"), 

552 dict(instrument="DummyCam", id=111, name="111", visit=11, physical_filter="dummy_r"), 

553 dict(instrument="DummyCam", id=200, name="200", visit=20, physical_filter="dummy_r"), 

554 dict(instrument="DummyCam", id=201, name="201", visit=20, physical_filter="dummy_r"), 

555 ) 

556 # dataset types 

557 run1 = "test" 

558 run2 = "test2" 

559 registry.registerRun(run1) 

560 registry.registerRun(run2) 

561 storageClass = StorageClass("testDataset") 

562 registry.storageClasses.registerStorageClass(storageClass) 

563 rawType = DatasetType(name="RAW", 

564 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

565 storageClass=storageClass) 

566 registry.registerDatasetType(rawType) 

567 calexpType = DatasetType(name="CALEXP", 

568 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

569 storageClass=storageClass) 

570 registry.registerDatasetType(calexpType) 

571 

572 # add pre-existing datasets 

573 for exposure in (100, 101, 110, 111): 

574 for detector in (1, 2, 3): 

575 # note that only 3 of 5 detectors have datasets 

576 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

577 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

578 # exposures 100 and 101 appear in both collections, 100 has 

579 # different dataset_id in different collections, for 101 only 

580 # single dataset_id exists 

581 if exposure == 100: 

582 registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

583 if exposure == 101: 

584 registry.associate(run2, [ref]) 

585 # Add pre-existing datasets to second collection. 

586 for exposure in (200, 201): 

587 for detector in (3, 4, 5): 

588 # note that only 3 of 5 detectors have datasets 

589 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

590 registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

591 

592 dimensions = DimensionGraph( 

593 registry.dimensions, 

594 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

595 ) 

596 # Test that single dim string works as well as list of str 

597 rows = list(registry.queryDimensions("visit", datasets={rawType: [run1]}, expand=True)) 

598 rowsI = list(registry.queryDimensions(["visit"], datasets={rawType: [run1]}, expand=True)) 

599 self.assertEqual(rows, rowsI) 

600 # with empty expression 

601 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1]}, expand=True)) 

602 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

603 for dataId in rows: 

604 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure")) 

605 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

606 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

607 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

608 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

609 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

610 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

611 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

612 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

613 (100, 101, 110, 111)) 

614 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

615 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

616 

617 # second collection 

618 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run2]})) 

619 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

620 for dataId in rows: 

621 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure")) 

622 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

623 (100, 101, 200, 201)) 

624 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

625 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

626 

627 # with two input datasets 

628 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1, run2]})) 

629 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

630 for dataId in rows: 

631 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure")) 

632 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

633 (100, 101, 110, 111, 200, 201)) 

634 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

635 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

636 

637 # limit to single visit 

638 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1]}, 

639 where="visit = 10")) 

640 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

641 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

642 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

643 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

644 

645 # more limiting expression, using link names instead of Table.column 

646 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1]}, 

647 where="visit = 10 and detector > 1")) 

648 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

649 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

650 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

651 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

652 

653 # expression excludes everything 

654 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1]}, 

655 where="visit > 1000")) 

656 self.assertEqual(len(rows), 0) 

657 

658 # Selecting by physical_filter, this is not in the dimensions, but it 

659 # is a part of the full expression so it should work too. 

660 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1]}, 

661 where="physical_filter = 'dummy_r'")) 

662 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

663 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

664 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

665 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

666 

667 def testSkyMapDimensions(self): 

668 """Tests involving only skymap dimensions, no joins to instrument.""" 

669 registry = self.makeRegistry() 

670 

671 # need a bunch of dimensions and datasets for test, we want 

672 # "abstract_filter" in the test so also have to add physical_filter 

673 # dimensions 

674 registry.insertDimensionData( 

675 "instrument", 

676 dict(instrument="DummyCam") 

677 ) 

678 registry.insertDimensionData( 

679 "physical_filter", 

680 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"), 

681 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

682 ) 

683 registry.insertDimensionData( 

684 "skymap", 

685 dict(name="DummyMap", hash="sha!".encode("utf8")) 

686 ) 

687 for tract in range(10): 

688 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

689 registry.insertDimensionData( 

690 "patch", 

691 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

692 for patch in range(10)] 

693 ) 

694 

695 # dataset types 

696 run = "test" 

697 registry.registerRun(run) 

698 storageClass = StorageClass("testDataset") 

699 registry.storageClasses.registerStorageClass(storageClass) 

700 calexpType = DatasetType(name="deepCoadd_calexp", 

701 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

702 "abstract_filter")), 

703 storageClass=storageClass) 

704 registry.registerDatasetType(calexpType) 

705 mergeType = DatasetType(name="deepCoadd_mergeDet", 

706 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

707 storageClass=storageClass) 

708 registry.registerDatasetType(mergeType) 

709 measType = DatasetType(name="deepCoadd_meas", 

710 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

711 "abstract_filter")), 

712 storageClass=storageClass) 

713 registry.registerDatasetType(measType) 

714 

715 dimensions = DimensionGraph( 

716 registry.dimensions, 

717 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

718 | measType.dimensions.required) 

719 ) 

720 

721 # add pre-existing datasets 

722 for tract in (1, 3, 5): 

723 for patch in (2, 4, 6, 7): 

724 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

725 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

726 for aFilter in ("i", "r"): 

727 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, abstract_filter=aFilter) 

728 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

729 

730 # with empty expression 

731 rows = list(registry.queryDimensions(dimensions, 

732 datasets={calexpType: [run], mergeType: [run]})) 

733 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

734 for dataId in rows: 

735 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "abstract_filter")) 

736 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

737 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

738 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r")) 

739 

740 # limit to 2 tracts and 2 patches 

741 rows = list(registry.queryDimensions(dimensions, 

742 datasets={calexpType: [run], mergeType: [run]}, 

743 where="tract IN (1, 5) AND patch IN (2, 7)")) 

744 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

745 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

746 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

747 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r")) 

748 

749 # limit to single filter 

750 rows = list(registry.queryDimensions(dimensions, 

751 datasets={calexpType: [run], mergeType: [run]}, 

752 where="abstract_filter = 'i'")) 

753 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

754 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

755 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

756 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i",)) 

757 

758 # expression excludes everything, specifying non-existing skymap is 

759 # not a fatal error, it's operator error 

760 rows = list(registry.queryDimensions(dimensions, 

761 datasets={calexpType: [run], mergeType: [run]}, 

762 where="skymap = 'Mars'")) 

763 self.assertEqual(len(rows), 0) 

764 

765 def testSpatialMatch(self): 

766 """Test involving spatial match using join tables. 

767 

768 Note that realistic test needs a reasonably-defined skypix and regions 

769 in registry tables which is hard to implement in this simple test. 

770 So we do not actually fill registry with any data and all queries will 

771 return empty result, but this is still useful for coverage of the code 

772 that generates query. 

773 """ 

774 registry = self.makeRegistry() 

775 

776 # dataset types 

777 collection = "test" 

778 registry.registerRun(name=collection) 

779 storageClass = StorageClass("testDataset") 

780 registry.storageClasses.registerStorageClass(storageClass) 

781 

782 calexpType = DatasetType(name="CALEXP", 

783 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

784 storageClass=storageClass) 

785 registry.registerDatasetType(calexpType) 

786 

787 coaddType = DatasetType(name="deepCoadd_calexp", 

788 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

789 "abstract_filter")), 

790 storageClass=storageClass) 

791 registry.registerDatasetType(coaddType) 

792 

793 dimensions = DimensionGraph( 

794 registry.dimensions, 

795 dimensions=(calexpType.dimensions.required | coaddType.dimensions.required) 

796 ) 

797 

798 # without data this should run OK but return empty set 

799 rows = list(registry.queryDimensions(dimensions, datasets={calexpType: [collection]})) 

800 self.assertEqual(len(rows), 0) 

801 

802 def testCalibrationLabelIndirection(self): 

803 """Test that we can look up datasets with calibration_label dimensions 

804 from a data ID with exposure dimensions. 

805 """ 

806 registry = self.makeRegistry() 

807 

808 flat = DatasetType( 

809 "flat", 

810 registry.dimensions.extract( 

811 ["instrument", "detector", "physical_filter", "calibration_label"] 

812 ), 

813 "ImageU" 

814 ) 

815 registry.registerDatasetType(flat) 

816 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

817 registry.insertDimensionData( 

818 "physical_filter", 

819 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

820 ) 

821 registry.insertDimensionData( 

822 "detector", 

823 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in (1, 2, 3, 4, 5)] 

824 ) 

825 registry.insertDimensionData( 

826 "visit", 

827 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"), 

828 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_i"), 

829 ) 

830 registry.insertDimensionData( 

831 "exposure", 

832 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i", 

833 datetime_begin=datetime(2005, 12, 15, 2), datetime_end=datetime(2005, 12, 15, 3)), 

834 dict(instrument="DummyCam", id=101, name="101", visit=11, physical_filter="dummy_i", 

835 datetime_begin=datetime(2005, 12, 16, 2), datetime_end=datetime(2005, 12, 16, 3)), 

836 ) 

837 registry.insertDimensionData( 

838 "calibration_label", 

839 dict(instrument="DummyCam", name="first_night", 

840 datetime_begin=datetime(2005, 12, 15, 1), datetime_end=datetime(2005, 12, 15, 4)), 

841 dict(instrument="DummyCam", name="second_night", 

842 datetime_begin=datetime(2005, 12, 16, 1), datetime_end=datetime(2005, 12, 16, 4)), 

843 dict(instrument="DummyCam", name="both_nights", 

844 datetime_begin=datetime(2005, 12, 15, 1), datetime_end=datetime(2005, 12, 16, 4)), 

845 ) 

846 # Different flats for different nights for detectors 1-3 in first 

847 # collection. 

848 run1 = "calibs1" 

849 registry.registerRun(run1) 

850 for detector in (1, 2, 3): 

851 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="first_night", 

852 physical_filter="dummy_i", detector=detector)], 

853 run=run1) 

854 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="second_night", 

855 physical_filter="dummy_i", detector=detector)], 

856 run=run1) 

857 # The same flat for both nights for detectors 3-5 (so detector 3 has 

858 # multiple valid flats) in second collection. 

859 run2 = "calib2" 

860 registry.registerRun(run2) 

861 for detector in (3, 4, 5): 

862 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="both_nights", 

863 physical_filter="dummy_i", detector=detector)], 

864 run=run2) 

865 # Perform queries for individual exposure+detector combinations, which 

866 # should always return exactly one flat. 

867 for exposure in (100, 101): 

868 for detector in (1, 2, 3): 

869 with self.subTest(exposure=exposure, detector=detector): 

870 rows = registry.queryDatasets("flat", collections=[run1], 

871 instrument="DummyCam", 

872 exposure=exposure, 

873 detector=detector) 

874 self.assertEqual(len(list(rows)), 1) 

875 for detector in (3, 4, 5): 

876 with self.subTest(exposure=exposure, detector=detector): 

877 rows = registry.queryDatasets("flat", collections=[run2], 

878 instrument="DummyCam", 

879 exposure=exposure, 

880 detector=detector) 

881 self.assertEqual(len(list(rows)), 1) 

882 for detector in (1, 2, 4, 5): 

883 with self.subTest(exposure=exposure, detector=detector): 

884 rows = registry.queryDatasets("flat", collections=[run1, run2], 

885 instrument="DummyCam", 

886 exposure=exposure, 

887 detector=detector) 

888 self.assertEqual(len(list(rows)), 1) 

889 for detector in (3,): 

890 with self.subTest(exposure=exposure, detector=detector): 

891 rows = registry.queryDatasets("flat", collections=[run1, run2], 

892 instrument="DummyCam", 

893 exposure=exposure, 

894 detector=detector) 

895 self.assertEqual(len(list(rows)), 2) 

896 

897 def testAbstractFilterQuery(self): 

898 """Test that we can run a query that just lists the known 

899 abstract_filters. This is tricky because abstract_filter is 

900 backed by a query against physical_filter. 

901 """ 

902 registry = self.makeRegistry() 

903 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

904 registry.insertDimensionData( 

905 "physical_filter", 

906 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

907 dict(instrument="DummyCam", name="dummy_i2", abstract_filter="i"), 

908 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"), 

909 ) 

910 rows = list(registry.queryDimensions(["abstract_filter"])) 

911 self.assertCountEqual( 

912 rows, 

913 [DataCoordinate.standardize(abstract_filter="i", universe=registry.dimensions), 

914 DataCoordinate.standardize(abstract_filter="r", universe=registry.dimensions)] 

915 )